NHANES Mortality Analysis

Code
# load packages 
library(tidyverse)
library(gtsummary)
library(gt)
library(tidymodels)
library(censored)
library(paletteer)
library(survey)
library(patchwork)
# paletteer_d("colorBlindness::PairedColor12Steps")
col1 = "#FF7F00FF"; col2 = "#19B2FFFF"

# load data 
# results from univariate
wt_single_male = readRDS(here::here("results", "metrics_wtd_100_singlevar_male.rds"))
wt_single_female = readRDS(here::here("results", "metrics_wtd_100_singlevar_female.rds"))

# results from multivariable
wt_male = readRDS(here::here("results", "metrics_wtd_100_male.rds"))
wt_female = readRDS(here::here("results", "metrics_wtd_100_female.rds"))


# covariate/pa df 
df_all = readRDS(here::here("data", "covariates_accel_mortality_df.rds"))

Loading and preparing data

See code_README.md for description of the pipeline to process the data. For this analysis, we have one estimate for each physical activity variable, along with demographic variables for all participants who received an accelerometer. We create a separate dataset for the mortality analysis restricted to just individuals with valid accelerometry data and who are between 50 and 79 years old, and who are not missing any covariate data that will be used in the models. Finally, we winsorize the PA variables at the 99th percentile, to remove any extremely high outliers.

Code
df_mortality =
  df_all %>%
  filter(num_valid_days >= 3) %>%
  filter(age_in_years_at_screening >= 50 &
           age_in_years_at_screening < 80) 

df_accel = 
  df_all %>% 
  filter(num_valid_days >= 3 & age_in_years_at_screening >= 18)

df_mortality_win =
  df_mortality %>%
  filter(if_all(.cols = c(age_in_years_at_screening, gender,
                          race_hispanic_origin, cat_education,
                          cat_bmi, chd, chf, heartattack, stroke, cancer,
                          bin_diabetes, cat_alcohol, cat_smoke, bin_mobilityproblem,
                          general_health_condition, mortstat, permth_exm, mean_PAXMTSM),
                ~!is.na(.x))) %>%
  mutate(event_time = permth_exm / 12) %>% 
  ungroup() %>%
  mutate(across(c(contains("mean"), contains("peak")), ~DescTools::Winsorize(.x, probs = c(0, 0.99)))) 

df_accel_win =
  df_accel %>%
  mutate(across(c(contains("mean"), contains("peak")), ~DescTools::Winsorize(.x, probs = c(0, 0.99)))) 
Code
# TO DO: check these models 
# sample sizes for exclusion diagram

df_all %>% 
  count(data_release_cycle)

df_all %>% 
  group_by(data_release_cycle) %>% 
  count(has_accel)

acc_subset = df_all %>% 
  filter(has_accel)
acc_subset %>% 
  group_by(data_release_cycle) %>% 
  count()

acc_subset %>% 
  group_by(data_release_cycle) %>% 
  count(valid_accel)

acc_subset %>% 
  group_by(data_release_cycle) %>% 
  count(inclusion_type)

acc_subset %>% 
  filter(valid_accel) %>% 
  mutate(adult = age_in_years_at_screening >= 18) %>% 
  group_by(data_release_cycle) %>% 
  count(adult)

acc_subset %>% 
  filter(valid_accel) %>% 
  mutate(b50 = age_in_years_at_screening < 50,
         o80 = age_in_years_at_screening >= 80) %>% 
  group_by(data_release_cycle) %>% 
  count(b50, o80)

acc_subset %>% 
  filter(num_num_valid_days >= 3 & age_in_years_at_screening >= 50 & age_in_years_at_screening < 80) %>% 
  filter(!(if_all(.cols = c(age_in_years_at_screening, gender,
                          race_hispanic_origin, cat_education,
                          cat_bmi, chd, chf, heartattack, stroke, cancer,
                          bin_diabetes, cat_alcohol, cat_smoke, bin_mobilityproblem,
                          general_health_condition, mortstat, permth_exm),
                ~!is.na(.x)))) %>% 
  group_by(data_release_cycle) %>%
  count()

4351+269+34
6497-(4351+269+34)
3913+264+48

6020-4225

Figures

Distribution of PA Variables

Probably supplemental

Code
df_accel %>%
  select(contains("steps") & contains("mean"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("Actilife", "ADEPT", "Oak", "Sc. RF", "Sc. SSL", "Verisense", "Verisense rev."))) %>% 
  ggplot(aes(x = value / 1000, fill = gender, col = gender))+
  geom_density() + 
  facet_grid(gender~name)+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Daily Steps x 1000", y = "Density", title = "Distribution of Mean Step Counts",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel_win %>%
  select(contains("steps") & contains("mean"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("Actilife", "ADEPT", "Oak", "Sc. RF", "Sc. SSL", "Verisense", "Verisense rev."))) %>% 
  ggplot(aes(x = value / 1000, fill = gender, col = gender))+
  geom_density() + 
  facet_grid(gender~name)+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Daily Steps x 1000", y = "Density", title = "Distribution of Winsorized Mean Step Counts",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel %>%
  select(!contains("steps") & contains("mean"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("AC", "log10 AC", "log10 MIMS", "MIMS"))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_wrap(gender~name, scales = "free")+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Daily Value", y = "Density", title = "Distribution of Mean PA Variables",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel_win %>%
  select(!contains("steps") & contains("mean"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("AC", "log10 AC", "log10 MIMS", "MIMS"))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_wrap(gender~name, scales = "free")+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Daily Value", y = "Density", title = "Distribution of Winsorized Mean PA Variables",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel %>%
  select(contains("steps") & contains("peak1"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("Actilife", "ADEPT", "Oak", "Sc. RF", "Sc. SSL", "Verisense", "Verisense rev."))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_grid(gender~name)+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Peak 1 Min Cadence (steps/min)", y = "Density", title = "Distribution of Peak 1 Min Cadence",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel_win %>%
  select(contains("steps") & contains("peak1"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("Actilife", "ADEPT", "Oak", "Sc. RF", "Sc. SSL", "Verisense", "Verisense rev."))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_grid(gender~name)+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Peak 1 Min Cadence (steps/min)", y = "Density", title = "Distribution of Winsorized Peak 1 Min Cadence",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel %>%
  select(contains("steps") & contains("peak30"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("Actilife", "ADEPT", "Oak", "Sc. RF", "Sc. SSL", "Verisense", "Verisense rev."))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_grid(gender~name)+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Peak 30 Min Cadence (steps/min)", y = "Density", title = "Distribution of Peak 30 Min Cadence",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel_win %>%
  select(contains("steps") & contains("peak30"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("Actilife", "ADEPT", "Oak", "Sc. RF", "Sc. SSL", "Verisense", "Verisense rev."))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_wrap(gender~name, scales = "free")+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Peak 30 Min Cadence (steps/min)", y = "Density", title = "Distribution of Winsorized Peak 30 Min Cadence",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel %>%
  select(!contains("steps") & contains("peak1"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("AC", "log10 AC", "log10 MIMS", "MIMS"))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_wrap(gender~name, scales = "free")+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Peak Value", y = "Density", title = "Distribution of Peak 1 Min Variables",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel_win %>%
  select(!contains("steps") & contains("peak1"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("AC", "log10 AC", "log10 MIMS", "MIMS"))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_wrap(gender~name, scales = "free")+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Peak Value", y = "Density", title = "Distribution of Winsorized Peak 1 Min Variables",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel %>%
  select(!contains("steps") & contains("peak30"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("AC", "log10 AC", "log10 MIMS", "MIMS"))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_wrap(gender~name, scales = "free")+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Peak Value", y = "Density", title = "Distribution of Peak 30 Min Variables",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel_win %>%
  select(!contains("steps") & contains("peak30"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("AC", "log10 AC", "log10 MIMS", "MIMS"))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_wrap(gender~name, scales = "free")+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Peak Value", y = "Density", title = "Distribution of Winsorized Peak 30 Min Variables",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Mean Step Counts and Peak Cadence by Age and Sex

Tent. Figure 1

Code
df_means = df_accel %>%
  filter(age_in_years_at_screening >= 18 & age_in_years_at_screening < 80) %>% 
  group_by(data_release_cycle) %>%
  mutate(weight = full_sample_2_year_mec_exam_weight / 2,
         weight_norm = weight / mean(weight)) %>% 
  ungroup() 

# survey_design <- svydesign(ids = ~1, weights = ~weight, data = df)
survey_design = survey::svydesign(
  id = ~ masked_variance_pseudo_psu,
  strata = ~ masked_variance_pseudo_stratum,
  weights = ~ weight_norm,
  data = df_means,
  nest = TRUE
)

# Calculate mean estimate by age
calc_by_age =
  function(var, df) {
    # var = "mean_oak_steps_80"
    formula = as.formula(paste("~", var))
    mean_by_age_gender = svyby(formula,
                               ~ age_in_years_at_screening + gender,
                               survey_design,
                               svymean) %>%
      rename(mean = contains(var)) %>%
      mutate(metric = var)
  }

means_df = 
  map_dfr(.x = df_means %>% select(contains("mean") | contains("peak")) %>% colnames(),
          .f = calc_by_age, df = df)

models = means_df %>%
   mutate(lb = mean - 1.96 * se,
         ub = mean + 1.96 * se) %>% 
        tidyr::nest(data = -c(metric, gender)) %>%
        dplyr::mutate(
                # Perform loess calculation on each group
                m = purrr::map(data, loess,
                               formula = mean ~ age_in_years_at_screening, span = .75),
                # Retrieve the fitted values from each model
                fitted_mean = purrr::map(m, `[[`, "fitted"),
                l = purrr::map(data, loess,
                               formula = lb ~ age_in_years_at_screening, span = .75),
                # Retrieve the fitted values from each model
                fitted_lb = purrr::map(l, `[[`, "fitted"),
                u = purrr::map(data, loess,
                               formula = ub ~ age_in_years_at_screening, span = .75),
                # Retrieve the fitted values from each model
                fitted_ub = purrr::map(u, `[[`, "fitted")
        )

# Apply fitted y's as a new column
results = models %>%
        dplyr::select(-m, -l, -u) %>%
        tidyr::unnest(cols = c(data, contains("fitted")))

results %>% 
  filter(grepl("step", metric) & grepl("mean", metric)) %>%
  mutate(across(contains("fitted"), ~.x / 1000),
                metric = factor(metric, levels = c("mean_acti_steps", 
                                            "mean_adept_steps",
                                            "mean_oak_steps_80",
                                            "mean_vs_steps_15",
                                            "mean_vs_revised_steps_15",
                                            "mean_scssl_steps",
                                            "mean_scrf_steps"),
                         labels = c("Actilife", "ADEPT", "Oak", "Verisense", "Verisense rev.", "Stepcount SSL", "Stepcount RF"))) %>% 
  ggplot(aes(x = age_in_years_at_screening, y = fitted_mean,
             ymin = fitted_lb, ymax = fitted_ub, color = gender, fill = gender)) +
  facet_grid(. ~ metric) +
  geom_line() + 
  geom_ribbon(alpha = .2, linetype = 0) +
  scale_color_manual(values = c(col1, col2), name = "")+
  scale_fill_manual(values = c(col1, col2), name = "")+
  theme_light() +
  theme(legend.position = "bottom",
        legend.title = element_blank()) +
  labs(x = "Age (yrs)", y = "Mean Daily Steps x 1000",
       title = "Smoothed Survey Weighted Mean Daily Steps by Age and Sex")+
  scale_y_continuous(breaks=seq(0,16,2))

Code
p1 = results %>% 
  filter(grepl("step", metric)) %>%
  mutate(across(contains("fitted"), ~.x / 1000),
                metric = factor(metric, levels = c("mean_acti_steps", 
                                            "mean_adept_steps",
                                            "mean_oak_steps_80",
                                            "mean_vs_steps_15",
                                            "mean_vs_revised_steps_15",
                                            "mean_scssl_steps",
                                            "mean_scrf_steps"),
                         labels = c("Actilife", "ADEPT", "Oak", "Verisense", "Verisense rev.", "Stepcount SSL", "Stepcount RF"))) %>% 
  ggplot(aes(x = age_in_years_at_screening, y = fitted_mean,
             ymin = fitted_lb, ymax = fitted_ub, color = gender, fill = gender)) +
  facet_grid(. ~ metric) +
  geom_line() + 
  geom_ribbon(alpha = .2, linetype = 0) +
  scale_color_manual(values = c(col1, col2), name = "")+
  scale_fill_manual(values = c(col1, col2), name = "")+
  theme_light() +
  theme(legend.position = "none",
        legend.title = element_blank()) +
  labs(x = "Age (yrs)", y = "Mean Daily Steps x 1000",
       title = "Smoothed Survey Weighted Mean Daily Steps by Age and Sex")+
  scale_y_continuous(breaks=seq(0,16,2))

results %>% 
  filter(grepl("mean", metric) & !grepl("step", metric)) %>%
  mutate(across(contains("fitted"), ~.x / 1000),
                metric = factor(metric, levels = c("mean_AC", "mean_log10AC",
                                                   "mean_PAXMTSM","mean_log10PAXMTSM"),
                         labels = c("AC", "log10 AC", "MIMS", "log10 MIMS"))) %>% 
  ggplot(aes(x = age_in_years_at_screening, y = fitted_mean,
             ymin = fitted_lb, ymax = fitted_ub, color = gender, fill = gender)) +
  facet_wrap(. ~ metric, scales = "free", nrow = 1) +
  geom_line() + 
  geom_ribbon(alpha = .2, linetype = 0) +
  scale_color_manual(values = c(col1, col2), name = "")+
  scale_fill_manual(values = c(col1, col2), name = "")+
  theme_light() +
  theme(legend.position = "bottom",
        legend.title = element_blank()) +
  labs(x = "Age (yrs)", y = "Mean Variable x 1000",
       title = "Smoothed Survey Weighted Mean Physical Activity by Age and Sex")

Code
results %>% 
  filter(grepl("peak", metric) & grepl("step", metric)) %>% 
  mutate(type = sub("_.*", "", metric),
         method = sub("^[^_]*_", "", metric)) %>% 
  filter(grepl("step", metric)) %>%
  mutate(method = factor(method, levels = c("acti_steps", 
                                            "adept_steps",
                                            "oak_steps_80",
                                            "vs_steps_15",
                                            "vs_revised_steps_15",
                                            "scssl_steps",
                                            "scrf_steps"),
                         labels = c("Actilife", "ADEPT", "Oak", "Verisense", "Verisense revised", "Stepcount SSL", "Stepcount RF"))) %>% 
  ggplot(aes(x = age_in_years_at_screening, y = fitted_mean,
             ymin = fitted_lb, ymax = fitted_ub)) +
  facet_grid(. ~ method) +
  geom_line(aes(linetype = type, color = gender)) + 
  geom_ribbon(alpha = .2, aes(fill = gender, linetype = type), color = NA) +
  scale_color_manual(values = c(col1, col2), name = "")+
  scale_fill_manual(values = c(col1, col2), name = "")+
  theme_light() +
  scale_linetype_discrete(labels = c("1 Minute", "30 Minute")) + 
  theme(legend.position = "bottom",
        legend.title = element_blank()) +
  labs(x = "Age (yrs)", y = "Peak Cadence (steps/min)",
       title = "Smoothed Survey Weighted Peak Cadence by Age and Sex")+
    scale_y_continuous(breaks=seq(20, 120,15))

Code
results %>% 
  filter(grepl("peak", metric) & !grepl("step", metric)) %>% 
  mutate(type = sub("_.*", "", metric),
         method = sub("^[^_]*_", "", metric)) %>% 
  mutate(method = factor(method, levels = c("AC", "log10AC",
                                                   "PAXMTSM","log10PAXMTSM"),
                         labels = c("AC", "log10 AC", "MIMS", "log10 MIMS"))) %>% 
  ggplot(aes(x = age_in_years_at_screening, y = fitted_mean,
             ymin = fitted_lb, ymax = fitted_ub)) +
  facet_wrap(. ~ method, scales="free") +
  geom_line(aes(linetype = type, color = gender)) + 
  geom_ribbon(alpha = .2, aes(fill = gender, linetype = type), color = NA) +
  scale_color_manual(values = c(col1, col2), name = "")+
  scale_fill_manual(values = c(col1, col2), name = "")+
  theme_light() +
  scale_linetype_discrete(labels = c("1 Minute", "30 Minute")) + 
  theme(legend.position = "bottom",
        legend.title = element_blank()) +
  labs(x = "Age (yrs)", y = "Peak Value",
       title = "Smoothed Survey Weighted Peak Variables by Age and Sex")

Correlations between Step Counts and AC/MIMS

Figure 2?

Code
cor_mat = 
  df_accel %>% select(contains("mean")) %>%
  cor(., use = "complete", method = "spearman") 
pvals = df_accel %>% select(contains("mean")) %>%
  rstatix::cor_pmat(.) %>% 
  select(-rowname) %>% 
  as.matrix()
colnames(cor_mat) = rownames(cor_mat) =colnames(pvals) = rownames(pvals) = c("AC", "MIMS", "Actilife", "ADEPT", "log10 AC", "log10 MIMS", "Oak", "Sc. RF", "Sc. SSL", "Vs rev.", "Vs")


corrplot::corrplot(cor_mat,  
         method="circle", 
         type="upper", 
         # col=paletteer_d("colorBlindness::Blue2DarkRed12Steps"), 
        col= paletteer_d("colorBlindness::Blue2Orange8Steps"),
         tl.col="black", 
         tl.srt=45, 
         p.mat = pvals,
         col.lim=c(0.2,1),
         sig.level = 0.05, 
         insig = "blank",
         is.corr = FALSE,
        diag = FALSE,
         order = "AOE",
        title = "Spearman Correlations",
        addCoef.col = 'black') 

Code
# svg(here::here("manuscript", "figures", "correlation.svg"), width = 8, height = 8)
Code
# correlations by sex 

cor_mat = 
  df_pa %>% 
  filter(gender == "Male") %>% 
  select(contains("mean")) %>%
  cor(., use = "complete", method = "spearman") 
pvals = df_pa %>% 
  filter(gender == "Male") %>% 
  select(contains("mean")) %>%
  rstatix::cor_pmat(.) %>% 
  select(-rowname) %>% 
  as.matrix()
colnames(cor_mat) = rownames(cor_mat) =colnames(pvals) = rownames(pvals) = c("AC", "Actilife", "ADEPT", "log10 AC", "log10 MIMS", "Oak", "MIMS", "Sc. RF", "Sc. SSL", "Vs rev.", "Vs")


corrplot::corrplot(cor_mat,  
         method="circle", 
         type="upper", 
         # col=paletteer_d("colorBlindness::Blue2DarkRed12Steps"), 
        col= paletteer_d("colorBlindness::Blue2Orange8Steps"),
         tl.col="black", 
         tl.srt=45, 
         p.mat = pvals,
         col.lim=c(0.2,1),
         sig.level = 0.05, 
         insig = "blank",
         is.corr = FALSE,
        diag = FALSE,
         order = "AOE",
        title = "Spearman Correlations, Male",
        addCoef.col = 'black') 

cor_mat = 
  df_pa %>% 
  filter(gender == "Female") %>% 
  select(contains("mean")) %>%
  cor(., use = "complete", method = "spearman") 
pvals = df_pa %>% 
  filter(gender == "Female") %>% 
  select(contains("mean")) %>%
  rstatix::cor_pmat(.) %>% 
  select(-rowname) %>% 
  as.matrix()
colnames(cor_mat) = rownames(cor_mat) =colnames(pvals) = rownames(pvals) = c("AC", "Actilife", "ADEPT", "log10 AC", "log10 MIMS", "Oak", "MIMS", "Sc. RF", "Sc. SSL", "Vs rev.", "Vs")


corrplot::corrplot(cor_mat,  
         method="circle", 
         type="upper", 
         # col=paletteer_d("colorBlindness::Blue2DarkRed12Steps"), 
        col= paletteer_d("colorBlindness::Blue2Orange8Steps"),
         tl.col="black", 
         tl.srt=45, 
         p.mat = pvals,
         col.lim=c(0.2,1),
         sig.level = 0.05, 
         insig = "blank",
         is.corr = FALSE,
        diag = FALSE,
         order = "AOE",
        title = "Spearman Correlations, Female",
        addCoef.col = 'black') 
cor_mat = 
  df_pa %>% 
  filter(gender == "Male") %>% 
  select(contains("mean")) %>%
  cor(., use = "complete", method = "pearson") 
pvals = df_pa %>% 
  filter(gender == "Male") %>% 
  select(contains("mean")) %>%
  rstatix::cor_pmat(.) %>% 
  select(-rowname) %>% 
  as.matrix()
colnames(cor_mat) = rownames(cor_mat) =colnames(pvals) = rownames(pvals) = c("AC", "Actilife", "ADEPT", "log10 AC", "log10 MIMS", "Oak", "MIMS", "Sc. RF", "Sc. SSL", "Vs rev.", "Vs")


corrplot::corrplot(cor_mat,  
         method="circle", 
         type="upper", 
         # col=paletteer_d("colorBlindness::Blue2DarkRed12Steps"), 
        col= paletteer_d("colorBlindness::Blue2Orange8Steps"),
         tl.col="black", 
         tl.srt=45, 
         p.mat = pvals,
         col.lim=c(0.2,1),
         sig.level = 0.05, 
         insig = "blank",
         is.corr = FALSE,
        diag = FALSE,
         order = "AOE",
        title = "Pearson Correlations, Male",
        addCoef.col = 'black') 

cor_mat = 
  df_pa %>% 
  filter(gender == "Female") %>% 
  select(contains("mean")) %>%
  cor(., use = "complete", method = "pearson") 
pvals = df_pa %>% 
  filter(gender == "Female") %>% 
  select(contains("mean")) %>%
  rstatix::cor_pmat(.) %>% 
  select(-rowname) %>% 
  as.matrix()
colnames(cor_mat) = rownames(cor_mat) =colnames(pvals) = rownames(pvals) = c("AC", "Actilife", "ADEPT", "log10 AC", "log10 MIMS", "Oak", "MIMS", "Sc. RF", "Sc. SSL", "Vs rev.", "Vs")


corrplot::corrplot(cor_mat,  
         method="circle", 
         type="upper", 
         # col=paletteer_d("colorBlindness::Blue2DarkRed12Steps"), 
        col= paletteer_d("colorBlindness::Blue2Orange8Steps"),
         tl.col="black", 
         tl.srt=45, 
         p.mat = pvals,
         col.lim=c(0.2,1),
         sig.level = 0.05, 
         insig = "blank",
         is.corr = FALSE,
        diag = FALSE,
         order = "AOE",
        title = "Pearson Correlations, Female",
        addCoef.col = 'black') 

Single variable mortality prediction

Figure 3?

Note - I’ll make these axes more legible when I can stretch out the graphs a bit more (not in rmd)

Code
# make label df 
var_labels = 
  tibble(names = 
           unique(wt_single_male$variable),
         labels = c("Age at screening", 
                    "BMI Category", "Race/ethnicity",
                    "Diabetes", "Education level", 
                    "CHF", "CHD", "Heart attack", 
                    "Stroke", "Cancer", "Alcohol use",
                    "Smoking status", "Mobility problem", 
                    "Self-reported health", "AC", "MIMS",
                     "Actilife steps", "ADEPT steps",
                    "log10 AC", "log10 MIMS", "Oak steps",
                     "Sc. RF steps", "Sc. SSL steps",
                    "Verisense rev. steps", "Verisense steps",
                    "Peak1 AC", "Peak1 MIMS",
                     "Peak1 Actilife steps", "Peak1 ADEPT steps",
                    "Peak1 log10 AC", "Peak1 log10 MIMS", "Peak1 Oak steps",
                     "Peak1 Sc. RF steps", "Peak1 Sc. SSL steps",
                    "Peak1 Verisense rev. steps", "Peak1 Verisense steps",
                     "Peak30 AC", "Peak30 MIMS",
                     "Peak30 Actilife steps", "Peak30 ADEPT steps",
                    "Peak30 log10 AC", "Peak30 log10 MIMS", "Peak30 Oak steps",
                     "Peak30 Sc. RF steps", "Peak30 Sc. SSL steps",
                    "Peak30 Verisense rev. steps", "Peak30 Verisense steps"))

# paletteer_d("ggthemes::colorblind")
m = wt_single_male %>% 
  filter(!grepl("peak", variable)) %>% 
  mutate(var_group = case_when(
    grepl("steps", variable) ~ "Step variable",
    grepl("mean", variable) ~ "Non-step accelerometry variable",
    TRUE ~ "Non-accelerometry variable"
  )) %>% 
  # mutate(variable = sub(".*mean\\_", "", variable)) %>% 
  group_by(variable, var_group) %>% 
  summarize(mean = mean(concordance),
            sd = sd(concordance),
            se = sd(concordance)/sqrt(n())) %>% 
  mutate(ci_low = mean - 1.96*se,
         ci_high = mean + 1.96*se) %>% 
  ungroup() %>% 
  left_join(var_labels, by = c("variable" = "names")) %>%
  mutate(labels = factor(labels),
         labels = fct_reorder(labels, mean),
         grp = "Male") %>% 
  ggplot(aes(y = labels, x = mean, xmin = ci_low, xmax = ci_high, color = var_group))+
  geom_point() + 
  facet_wrap(.~grp) +
  geom_errorbarh() + 
  theme_bw() + 
  scale_color_manual(values = c("#009E73FF", "#0072B2FF", "#CC79A7FF"), name = "")+
  scale_x_continuous(limits=c(0.49, 0.7835), breaks=seq(0.45, 0.785, .05))+
  theme(legend.position = c(.3, .75),
        legend.title = element_blank(),
      axis.text.y = element_text(size = 10),
      strip.text = element_text(size = 14))+
  labs(x = "Mean 100x 10-fold Cross-Validated Concordance", y = "")
  
f = wt_single_female %>% 
  filter(!grepl("peak", variable)) %>% 
  mutate(var_group = case_when(
    grepl("steps", variable) ~ "Step variable",
    grepl("mean", variable) ~ "Non-step accelerometry variable",
    TRUE ~ "Non-accelerometry variable"
  )) %>% 
  # mutate(variable = sub(".*mean\\_", "", variable)) %>% 
  group_by(variable, var_group) %>% 
  summarize(mean = mean(concordance),
            sd = sd(concordance),
            se = sd(concordance)/sqrt(n())) %>% 
  mutate(ci_low = mean - 1.96*se,
         ci_high = mean + 1.96*se) %>% 
  ungroup() %>% 
  left_join(var_labels, by = c("variable" = "names")) %>%
  mutate(labels = factor(labels),
         labels = fct_reorder(labels, mean),
         grp = "Female") %>% 
  ggplot(aes(y = labels, x = mean, xmin = ci_low, xmax = ci_high, color = var_group))+
  geom_point() + 
  geom_errorbarh() + 
  theme_bw() + 
  scale_color_manual(values = c("#009E73FF", "#0072B2FF", "#CC79A7FF"), name = "")+
  scale_x_continuous(limits=c(0.49, 0.7835), breaks=seq(0.45, 0.785, .05))+
  facet_wrap(.~grp) + 
  theme(legend.position = "none",
    # legend.position = c(.3, .8),
        legend.title = element_blank(),
        axis.text.y = element_text(size = 10),
    strip.text = element_text(size = 14))+
  labs(x = "", y = "")

f / m

Code
# ggsave(here::here("manuscript", "figures", "single_concordance.svg"), width = 8, height = 8)

Comparison with cadence estimates

Supplement?

Code
m = wt_single_male %>% 
  filter(grepl("peak", variable) | grepl("steps", variable)) %>% 
  mutate(var_group = factor(case_when(
    grepl("peak1", variable) ~ "Peak 1-min variable",
    grepl("peak30", variable) ~ "Peak 30-min variable",
    TRUE ~ "Mean daily total variable"
  ), levels = c("Peak 1-min variable", "Peak 30-min variable", "Mean daily total variable"))) %>% 
  group_by(variable, var_group) %>% 
  summarize(mean = mean(concordance),
            sd = sd(concordance),
            se = sd(concordance)/sqrt(n())) %>% 
  mutate(ci_low = mean - 1.96*se,
         ci_high = mean + 1.96*se) %>% 
  ungroup() %>% 
  left_join(var_labels, by = c("variable" = "names")) %>%
  mutate(labels = factor(labels),
         labels = fct_reorder(labels, mean),
         grp = "Male") %>% 
  ggplot(aes(y = labels, x = mean, xmin = ci_low, xmax = ci_high, color = var_group))+
  geom_point() + 
  geom_errorbarh() + 
  theme_bw() + 
  facet_wrap(.~grp) +
  scale_color_manual(values = c("#E69F00FF", "#D55E00FF", "#56B4E9FF"), name = "")+
  scale_x_continuous(limits=c(0.675, 0.783), breaks=seq(0.675, 0.775, .025))+
  theme(legend.position = c(.7, .4),
        legend.title = element_blank(),
        axis.text.y = element_text(size = 10),
        strip.text = element_text(size = 14))+
  labs(x = "Mean 100x 10-fold Cross-Validated Concordance", y = "")
  
f = wt_single_female %>% 
  filter(grepl("peak", variable) | grepl("steps", variable)) %>% 
 mutate(var_group = factor(case_when(
    grepl("peak1", variable) ~ "Peak 1-min variable",
    grepl("peak30", variable) ~ "Peak 30-min variable",
    TRUE ~ "Mean daily total variable"
  ), levels = c("Peak 1-min variable", "Peak 30-min variable", "Mean daily total variable"))) %>% 
   group_by(variable, var_group) %>% 
  summarize(mean = mean(concordance),
            sd = sd(concordance),
            se = sd(concordance)/sqrt(n())) %>% 
  mutate(ci_low = mean - 1.96*se,
         ci_high = mean + 1.96*se) %>% 
  ungroup() %>% 
  left_join(var_labels, by = c("variable" = "names")) %>%
  mutate(labels = factor(labels),
         labels = fct_reorder(labels, mean),
         grp = "Female") %>% 
  ggplot(aes(y = labels, x = mean, xmin = ci_low, xmax = ci_high, color = var_group))+
  geom_point() + 
  geom_errorbarh() + 
  theme_bw() + 
  facet_wrap(.~grp) +
  scale_color_manual(values = c("#E69F00FF", "#D55E00FF", "#56B4E9FF"), name = "")+
  scale_x_continuous(limits=c(0.675, 0.783), breaks=seq(0.675, 0.775, .025))+
  theme(legend.position = "none",
        legend.title = element_blank(),
        axis.text.y = element_text(size = 10),
        strip.text = element_text(size = 14))+
  labs(x = "Mean 100x 10-fold Cross-Validated Concordance", y = "")
  
f / m

Code
# ggsave(here::here("manuscript", "figures", "single_concordance_cadence.svg"), width = 8, height = 8)

Hazard ratio forest plots

Figure 4?

Code
pa_vars = df_mortality_win %>% 
  select(contains("mean") & contains("steps")) %>% 
  colnames()

df_mortality_win_scaled = 
  df_mortality_win %>% 
  mutate(across(c(contains("mean")), ~scale(.x)))


fit_model = function(var, df){
  
  male_df =
    df %>%
    filter(gender == "Male") %>%
    mutate(weight = full_sample_2_year_mec_exam_weight / 2, weight_norm = weight / mean(weight))
  
  female_df =
    df  %>%
    filter(gender == "Female") %>%
    mutate(weight = full_sample_2_year_mec_exam_weight / 2, weight_norm = weight / mean(weight))


  formula = as.formula(paste0("Surv(event_time, mortstat) ~", var, "+ 
      age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi +
      race_hispanic_origin +
      cat_bmi +
      cat_education +
      chf +
      chd +
      heartattack +
      stroke +
      cancer +
      bin_diabetes +
      cat_alcohol +
      cat_smoke +
      bin_mobilityproblem +
      general_health_condition"))
    mmodel = coxph(formula, data = male_df, weights = weight_norm) %>% 
      tidy() %>% 
      filter(grepl(var, term)) %>% 
      mutate(sex = "Male") 
    fmodel = coxph(formula, data = female_df, weights = weight_norm) %>% 
      tidy() %>% 
      filter(grepl(var, term)) %>% 
      mutate(sex = "Female") 
    bind_rows(mmodel, fmodel)
}


steps_res = 
  map_dfr(.x = pa_vars,
          .f = fit_model,
          df = df_mortality_win) %>% 
  mutate(hr = exp(estimate * 500),
         ci_low = exp(500*(estimate - (1.96 * std.error))),
         ci_high = exp(500*(estimate + (1.96 * std.error))))

m = 
  steps_res %>% 
  filter(sex == "Male") %>% 
  left_join(var_labels, by = c("term" = "names")) %>%
  mutate(labels = factor(labels),
         labels = fct_reorder(labels, hr, mean, .desc = TRUE),
         grp = "Male") %>%
  ggplot(aes(color = term)) +
  geom_point(aes(y = labels, x = hr), size = 2)+
  geom_errorbarh(aes(xmin = ci_low, xmax = ci_high, y = labels), linewidth = 1.1)+
  theme_bw() + 
  scale_color_paletteer_d("ggthemes::few_Dark")+
  theme(legend.position = "none",
        strip.text = element_text(size = 14)) + 
    scale_x_continuous(limits=c(0.725, 1), breaks=seq(0.7, 1, .05))+
  facet_wrap(.~grp)+
  geom_vline(aes(xintercept =  1), linetype = "dashed") +
  labs(x = "Adjusted HR Associated with 500-step Increase in Mean Steps per Day", y = "")

f = 
  steps_res %>% 
  filter(sex == "Female") %>% 
  left_join(var_labels, by = c("term" = "names")) %>%
  mutate(labels = factor(labels),
         labels = fct_reorder(labels, hr, mean, .desc = TRUE),
         grp = "Female") %>%
  ggplot(aes(color = term)) +
  geom_point(aes(y = labels, x = hr), size = 2)+
  geom_errorbarh(aes(xmin = ci_low, xmax = ci_high, y = labels), linewidth = 1.1)+
  theme_bw() + 
  scale_color_paletteer_d("ggthemes::few_Dark")+
    geom_vline(aes(xintercept =  1), linetype = "dashed") +
  theme(legend.position = "none",
        strip.text = element_text(size = 14)) + 
    scale_x_continuous(limits=c(0.725, 1), breaks=seq(0.7, 1, .05)) +
  facet_wrap(.~grp)+
  labs(x = "", y = "")
f / m

Code
# ggsave(here::here("manuscript", "figures", "forest_raw.svg"), width = 8, height = 8)

steps_res_scaled = 
  map_dfr(.x = c(pa_vars, "mean_PAXMTSM", "mean_AC"),
          .f = fit_model,
          df = df_mortality_win_scaled) %>% 
  mutate(hr = exp(estimate),
         ci_low = exp(estimate - (1.96 * std.error)),
         ci_high = exp(estimate + (1.96 * std.error))) %>% 
  left_join(var_labels, by = c("term" = "names"))

m = 
  steps_res_scaled %>% 
  filter(sex == "Male" & grepl("steps", term)) %>% 
  mutate(term2 = factor(term),
         term2 = fct_reorder(term2, hr, mean, .desc = TRUE),
         labels = factor(labels),
         labels = fct_reorder(labels, hr, mean, .desc = TRUE),
         grp = "Male") %>%
  ggplot(aes(color = term)) +
  geom_point(aes(y = labels, x = hr), size = 2)+
  geom_errorbarh(aes(xmin = ci_low, xmax = ci_high, y = labels), linewidth = 1.1)+
  theme_bw() + 
  facet_wrap(.~grp) + 
  scale_color_paletteer_d("ggthemes::few_Dark")+
  theme(legend.position = "none",
        strip.text = element_text(size = 14)) + 
  scale_x_continuous(limits=c(0.325, 1), breaks=seq(0.35, 1, .05))+
  geom_vline(aes(xintercept = 1), linetype = "dashed")+
  labs(x = "Adjusted HR Associated with One SD Increase in Mean Steps per Day", y = "")

f = 
  steps_res_scaled %>% 
  filter(sex == "Female" & grepl("steps", term)) %>% 
  mutate(term2 = factor(term),
         term2 = fct_reorder(term2, hr, mean, .desc = TRUE),
         labels = factor(labels),
         labels = fct_reorder(labels, hr, mean, .desc = TRUE),
         grp = "Female") %>%
  ggplot(aes(color = term)) +
  geom_point(aes(y = labels, x = hr), size = 2)+
  geom_errorbarh(aes(xmin = ci_low, xmax = ci_high, y = labels), linewidth = 1.1)+
  theme_bw() + 
  facet_wrap(.~grp) + 
  scale_color_paletteer_d("ggthemes::few_Dark")+
  theme(legend.position = "none",
        strip.text = element_text(size = 14)) + 
  scale_x_continuous(limits=c(0.325, 1), breaks=seq(0.35, 1, .05))+
  geom_vline(aes(xintercept = 1), linetype = "dashed")+ 
  labs(x = "", y = "")


f / m

Code
# ggsave(here::here("manuscript", "figures", "forest_scaled.svg"), width = 8, height = 8)
Code
pa_vars = df_mortality_win %>% 
  select(contains("mean") & contains("steps")) %>% 
  colnames()

df_mortality_win_scaled = 
  df_mortality_win %>% 
  mutate(across(c(contains("mean")), ~scale(.x)))


fit_model = function(var, df){
  
  male_df =
    df %>%
    filter(gender == "Male") %>%
    mutate(weight = full_sample_2_year_mec_exam_weight / 2, weight_norm = weight / mean(weight))
  
  female_df =
    df  %>%
    filter(gender == "Female") %>%
    mutate(weight = full_sample_2_year_mec_exam_weight / 2, weight_norm = weight / mean(weight))


  formula = as.formula(paste0("Surv(event_time, mortstat) ~", var, "+ 
      age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi +
      race_hispanic_origin +
      cat_bmi +
      cat_education +
      chf +
      chd +
      heartattack +
      stroke +
      cancer +
      bin_diabetes +
      cat_alcohol +
      cat_smoke +
      bin_mobilityproblem +
      general_health_condition"))
    mmodel = coxph(formula, data = male_df, weights = weight_norm) %>% 
      tidy() %>% 
      filter(grepl(var, term)) %>% 
      mutate(sex = "Male") 
    fmodel = coxph(formula, data = female_df, weights = weight_norm) %>% 
      tidy() %>% 
      filter(grepl(var, term)) %>% 
      mutate(sex = "Female") 
    bind_rows(mmodel, fmodel)
}


steps_res = 
  map_dfr(.x = pa_vars,
          .f = fit_model,
          df = df_mortality_win) %>% 
  mutate(hr = exp(estimate * 500),
         ci_low = exp(500*(estimate - (1.96 * std.error))),
         ci_high = exp(500*(estimate + (1.96 * std.error))))

m1 = 
  steps_res %>% 
  filter(sex == "Male") %>% 
  mutate(term2 = factor(term),
         term2 = fct_reorder(term2, hr, mean, .desc = TRUE)) %>%
  ggplot(aes(color = term)) +
  geom_point(aes(y = term2, x = hr), size = 2)+
  geom_errorbarh(aes(xmin = ci_low, xmax = ci_high, y = term2), linewidth = 1.1)+
  theme_bw() + 
  scale_color_paletteer_d("ggthemes::few_Dark")+
  theme(legend.position = "none") + 
    scale_x_continuous(limits=c(0.7, 1), breaks=seq(0.7, 1, .05)) +
  # facet_wrap(.~sex)+
  labs(x = "Adjusted HR Associated with 500-step Increase in Mean Steps per Day", title = "Male - Raw", y = "")

f1 = 
  steps_res %>% 
  filter(sex == "Female") %>% 
  mutate(term2 = factor(term),
         term2 = fct_reorder(term2, hr, mean, .desc = TRUE)) %>%
  ggplot(aes(color = term)) +
  geom_point(aes(y = term2, x = hr), size = 2)+
  geom_errorbarh(aes(xmin = ci_low, xmax = ci_high, y = term2), linewidth = 1.1)+
  theme_bw() + 
  scale_color_paletteer_d("ggthemes::few_Dark")+
  theme(legend.position = "none") + 
    scale_x_continuous(limits=c(0.7, 1), breaks=seq(0.7, 1, .05)) +
  # facet_wrap(.~sex)+
  labs(x = "", title = "Female - Raw", y = "")



steps_res_scaled = 
  map_dfr(.x = c(pa_vars, "mean_PAXMTSM", "mean_AC"),
          .f = fit_model,
          df = df_mortality_win_scaled) %>% 
  mutate(hr = exp(estimate),
         ci_low = exp(estimate - (1.96 * std.error)),
         ci_high = exp(estimate + (1.96 * std.error)))

m = 
  steps_res_scaled %>% 
  filter(sex == "Male" & grepl("steps", term)) %>% 
  mutate(term2 = factor(term),
         term2 = fct_reorder(term2, hr, mean, .desc = TRUE)) %>%
  ggplot(aes(color = term)) +
  geom_point(aes(y = term2, x = hr), size = 2)+
  geom_errorbarh(aes(xmin = ci_low, xmax = ci_high, y = term2), linewidth = 1.1)+
  theme_bw() + 
  scale_color_paletteer_d("ggthemes::few_Dark")+
  theme(legend.position = "none") + 
  scale_x_continuous(limits=c(0.3, 1), breaks=seq(0.3, 1, .05))+
  # facet_wrap(.~sex)+
  labs(x = "Adjusted HR Associated with One SD Increase in Mean Steps per Day", title = "Male - Standardized", y = "")

f = 
  steps_res_scaled %>% 
  filter(sex == "Female" & grepl("steps", term)) %>% 
  mutate(term2 = factor(term),
         term2 = fct_reorder(term2, hr, mean, .desc = TRUE)) %>%
  ggplot(aes(color = term)) +
  geom_point(aes(y = term2, x = hr), size = 2)+
  geom_errorbarh(aes(xmin = ci_low, xmax = ci_high, y = term2), linewidth = 1.1)+
  theme_bw() + 
  scale_color_paletteer_d("ggthemes::few_Dark")+
  theme(legend.position = "none") + 
  scale_x_continuous(limits=c(0.3, 1), breaks=seq(0.3, 1, .05))+
  # facet_wrap(.~sex)+
  labs(x = "", title = "Female - Standardized", y = "")


f1 + f + m1 + m + plot_layout(nrow = 2, ncol =2)

tab = steps_res_scaled %>% 
  mutate(type = "scaled") %>% 
  dplyr::select(sex, type, term, hr, ci_low, ci_high) %>% 
  bind_rows(steps_res %>% mutate(type = "raw") %>% 
  dplyr::select(sex, type, term, hr, ci_low, ci_high)) %>% 
  mutate(ci = paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(ci_low, 3)), ", ", sprintf("%0.3f",round(ci_high, 3)), ")")) %>% 
   arrange(sex, hr) %>% 
  select(-hr, -ci_low, -ci_high) %>% 
  pivot_wider(names_from = type, values_from = ci) %>% 
  filter(grepl("steps", term)) %>% 
  kableExtra::kbl("latex", booktabs = TRUE)

Tables

Table 1: Demographics

Code
# add labels 

name_vec = colnames(df_accel)
labs = c("SEQN", "Data release cycle",
         "Interview Examination Status",
         "Sex", "Age (yrs)", "Age (mos)", "Race/ethnicity",
         "Six month time period",
         "Educ. level adults", "Marital status",
         "2 yr int weight", "2 yr exam weight", "Pseudo PSU",
         "Psueudo stratum", 
         "Annual HH income", 
         "Weight (kg)", "Height (cm)", "BMI (kg/m2)", 
         "Overweight", "Diabetes orig.", "Diabetes", "Arthritis", 
         "Coronary Heart Failure", "Congenital Heart Disease", "Angina", 
         "Heart attack", "Stroke", "Cancer", colnames(df_all)[29:33],
         "Alcohol use", "BMI Category", "Smoking status", "Mobility problem", "General health condition",
         "Eligbility", "Died by 5 years follow up", "COD", "COD Diabetes", "COD Hypertension", 
         "Person-months follow up from interview", "Person-months follow up from exam", "Eligibility category", "COD category",  "AC", "MIMS",
                     "Actilife steps", "ADEPT steps",
                    "log10 AC", "log10 MIMS", "Oak steps",
                     "Sc. RF steps", "Sc. SSL steps",
                    "Verisense rev. steps", "Verisense steps",
                    "Peak1 AC", "Peak1 MIMS",
                     "Peak1 Actilife steps", "Peak1 ADEPT steps",
                    "Peak1 log10 AC", "Peak1 log10 MIMS", "Peak1 Oak steps",
                     "Peak1 Sc. RF steps", "Peak1 Sc. SSL steps",
                    "Peak1 Verisense rev. steps", "Peak1 Verisense steps",
                     "Peak30 AC", "Peak30 MIMS",
                     "Peak30 Actilife steps", "Peak30 ADEPT steps",
                    "Peak30 log10 AC", "Peak30 log10 MIMS", "Peak30 Oak steps",
                     "Peak30 Sc. RF steps", "Peak30 Sc. SSL steps",
                    "Peak30 Verisense rev. steps", "Peak30 Verisense steps", "No. valid days", "Received accelerometer", 
         "Valid accelerometry", "Inclusion category", "Education level")



names(labs) = name_vec

df_all = 
  df_accel %>% 
  labelled::set_variable_labels(!!!labs)
# survey weighted table 
# question about this!! 
df_svy =
  df_all %>% 
  filter(has_accel) %>% 
  filter(valid_accel) %>% ### do we want to do this? 
  select(
    gender,
    age_in_years_at_screening,
    race_hispanic_origin,
    cat_education,
    cat_bmi,
    bin_diabetes,
    chf,
    chd,
    stroke,
    cat_alcohol,
    cat_smoke,
    bin_mobilityproblem,
    general_health_condition,
    mortstat,
    data_release_cycle,
    masked_variance_pseudo_psu, masked_variance_pseudo_stratum,
    full_sample_2_year_mec_exam_weight
  )  %>%
  mutate(WTMEC4YR = full_sample_2_year_mec_exam_weight/2,
         WTMEC4YR_norm = WTMEC4YR/mean(WTMEC4YR, na.rm = TRUE)) %>%
  select(-full_sample_2_year_mec_exam_weight) %>%
  svydesign(ids = ~masked_variance_pseudo_psu, weights = ~WTMEC4YR_norm, 
            strata = ~masked_variance_pseudo_stratum, data=., nest=TRUE)

df_svy %>%
  tbl_svysummary(
    by = data_release_cycle,
    include = -c(masked_variance_pseudo_psu, masked_variance_pseudo_stratum, WTMEC4YR,
                 WTMEC4YR_norm),
    statistic = list(
      all_continuous() ~ "{mean} ({sd})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 2,
    missing_text = "Missing",
  ) %>%
  add_overall() %>% 
  modify_caption("Demographic Characteristics, All Adults")
Demographic Characteristics, All Adults
Characteristic Overall, N = 8,6641 7, N = 4,3671 8, N = 4,2971
Sex
    Female 4,552 (53%) 2,281 (52%) 2,271 (53%)
    Male 4,112 (47%) 2,086 (48%) 2,027 (47%)
Age (yrs) 48.01 (17.41) 47.99 (17.29) 48.04 (17.53)
Race/ethnicity
    Non-Hispanic White 5,785 (67%) 2,918 (67%) 2,867 (67%)
    Non-Hispanic Black 979 (11%) 505 (12%) 475 (11%)
    Other Race - Including Multi-Rac 640 (7.4%) 312 (7.2%) 327 (7.6%)
    Mexican American 736 (8.5%) 344 (7.9%) 393 (9.1%)
    Other Hispanic 524 (6.0%) 288 (6.6%) 236 (5.5%)
Education level
    More than HS 5,279 (63%) 2,664 (63%) 2,615 (63%)
    Less than HS 1,330 (16%) 703 (17%) 627 (15%)
    HS/HS equivalent 1,788 (21%) 877 (21%) 911 (22%)
    Missing 267 123 144
BMI Category
    Normal 2,436 (28%) 1,258 (29%) 1,178 (28%)
    Obese 3,176 (37%) 1,538 (36%) 1,639 (38%)
    Overweight 2,825 (33%) 1,445 (33%) 1,380 (32%)
    Underweight 156 (1.8%) 84 (2.0%) 71 (1.7%)
    Missing 70 41 29
Diabetes 887 (10%) 430 (9.9%) 457 (11%)
    Missing 2 0 2
Coronary Heart Failure 247 (2.9%) 135 (3.2%) 113 (2.7%)
    Missing 270 127 143
Congenital Heart Disease 316 (3.8%) 145 (3.4%) 171 (4.1%)
    Missing 283 133 150
Stroke 263 (3.1%) 140 (3.3%) 124 (3.0%)
    Missing 267 124 143
Alcohol use
    Never drinker 1,057 (12%) 477 (11%) 581 (14%)
    Former drinker 1,233 (14%) 619 (14%) 614 (14%)
    Moderate drinker 2,657 (31%) 1,401 (32%) 1,256 (29%)
    Heavy drinker 669 (7.7%) 374 (8.6%) 296 (6.9%)
    Missing alcohol 3,048 (35%) 1,496 (34%) 1,552 (36%)
Smoking status
    Never smoker 4,820 (56%) 2,351 (55%) 2,470 (57%)
    Former smoker 2,087 (24%) 1,071 (25%) 1,016 (24%)
    Current smoker 1,630 (19%) 820 (19%) 810 (19%)
    Missing 127 126 1
Mobility problem 1,411 (17%) 638 (15%) 773 (19%)
    Missing 270 123 146
General health condition
    Poor 232 (2.7%) 106 (2.4%) 126 (2.9%)
    Fair 1,323 (15%) 612 (14%) 711 (17%)
    Good 3,408 (39%) 1,670 (38%) 1,738 (40%)
    Very good 2,742 (32%) 1,442 (33%) 1,300 (30%)
    Excellent 959 (11%) 536 (12%) 423 (9.8%)
Died by 5 years follow up 648 (7.5%) 354 (8.1%) 294 (6.8%)
    Missing 15 12 4
1 n (%); Mean (SD)
Code
tb = df_svy %>%
  tbl_svysummary(
    by = data_release_cycle,
    include = -c(masked_variance_pseudo_psu, masked_variance_pseudo_stratum, WTMEC4YR,
                 WTMEC4YR_norm),
    statistic = list(
      all_continuous() ~ "{mean} ({sd})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 2,
    missing_text = "Missing",
  ) %>%
  add_overall() %>% 
  modify_caption("Demographic Characteristics, All Adults") %>% 
  kableExtra::kbl("latex", booktabs = TRUE)

Table 2: Physical Activity

Option 1: by wave and age

Code
# with PA

df_adult = df_accel %>% 
  mutate(weight = full_sample_2_year_mec_exam_weight / 2,
         weight_norm = weight / mean(weight)) %>%
  mutate(group = "All adults")

df_mort = df_mortality %>%
  mutate(weight = full_sample_2_year_mec_exam_weight / 2,
         weight_norm = weight / mean(weight)) %>%
  mutate(group = "50-79y.o") 


df = bind_rows(df_adult, df_mort) 

# df = 
#   df %>% 
#     labelled::set_variable_labels(!!!labs)

df = df %>% 
  rename(Verisense = "mean_vs_steps_15",
         "Verisense rev" = "mean_vs_revised_steps_15",
         Oak = "mean_oak_steps_80",
         ADEPT = "mean_adept_steps",
         SCRF =  "mean_scrf_steps",
         SCSSL =  "mean_scssl_steps",
         MIMS = "mean_PAXMTSM",
         Actilife = "mean_acti_steps",
         log10MIMS = "mean_log10PAXMTSM",
         log10AC = "mean_log10AC",
         AC = "mean_AC") %>% 
  select(!contains("peak"))

df_analysis_svy = survey::svydesign(
  id = ~ masked_variance_pseudo_psu,
  strata = ~ masked_variance_pseudo_stratum,
  weights = ~ weight_norm,
  data = df,
  nest = TRUE
)


df_analysis_svy %>% 
  tbl_strata(
    strata = data_release_cycle,
    .tbl_fun =
      ~ .x %>%
      tbl_svysummary(.,
                     include = c(contains("Verisense"), contains("AC", ignore.case = F),
                                 contains("Oak"), contains("ADEPT"), contains("SCRF"), contains("SCSSL"), contains("MIMS"), contains("actilife")),
                     by = group,
                     statistic = list(
                       all_continuous() ~ "{mean} ({sd})",
                       all_categorical() ~ "{n} ({p}%)"
                     )),
    .header = "**Wave {strata}**, N = {n}"
  ) %>% 
  modify_caption("Physical Activity Mean Totals Stratified by Age and Wave")
Physical Activity Mean Totals Stratified by Age and Wave
Characteristic Wave 7, N = 6244 Wave 8, N = 6140
50-79y.o, N = 1,8771 All adults, N = 4,3671 50-79y.o, N = 1,8431 All adults, N = 4,2971
Verisense rev 8,025 (4,422) 9,102 (4,756) 7,553 (4,337) 8,725 (4,730)
Verisense 8,784 (3,885) 9,497 (4,062) 8,374 (3,922) 9,163 (4,065)
AC 2,418,849 (765,529) 2,573,262 (815,192) 2,364,933 (745,731) 2,549,846 (816,305)
log10AC 2,916 (358) 2,955 (366) 2,879 (377) 2,933 (370)
Oak 10,853 (4,837) 11,794 (5,061) 10,256 (4,746) 11,381 (5,065)
ADEPT 2,500 (1,564) 2,659 (1,569) 2,287 (1,687) 2,453 (1,575)
SCRF 10,529 (5,345) 11,509 (5,722) 10,068 (5,311) 11,263 (5,764)
SCSSL 8,814 (4,268) 9,144 (4,400) 8,423 (4,309) 8,846 (4,399)
MIMS 12,823 (3,519) 13,572 (3,758) 12,578 (3,477) 13,467 (3,784)
log10MIMS 976 (150) 998 (154) 966 (154) 994 (155)
Actilife 11,639 (3,851) 12,169 (3,995) 11,237 (3,907) 11,902 (4,048)
1 Mean (SD)
Code
tb = df_analysis_svy %>% 
  tbl_strata(
    strata = data_release_cycle,
    .tbl_fun =
      ~ .x %>%
      tbl_svysummary(.,
                     include = c(contains("Verisense"), contains("AC", ignore.case = F),
                                 contains("Oak"), contains("ADEPT"), contains("SCRF"), contains("SCSSL"), contains("MIMS"), contains("actilife")),
                     by = group,
                     statistic = list(
                       all_continuous() ~ "{mean} ({sd})",
                       all_categorical() ~ "{n} ({p}%)"
                     )),
    .header = "**Wave {strata}**, N = {n}"
  ) %>% 
  modify_caption("Physical Activity Mean Totals Stratified by Age and Wave") %>% 
  kableExtra::kbl("latex", booktabs = TRUE)

Option 2: by sex and age

Code
df_analysis_svy %>% 
  tbl_strata(
    strata = gender,
    .tbl_fun =
      ~ .x %>%
      tbl_svysummary(.,
                     include = c(Verisense, "Verisense rev", 
                                 Oak, ADEPT, SCRF, SCSSL, Actilife, MIMS, 
                                 log10MIMS, AC, log10AC),
                     by = group,
                     statistic = list(
                       all_continuous() ~ "{mean} ({sd})",
                       all_categorical() ~ "{n} ({p}%)"
                     )),
    .header = "**{strata}**, N = {n}"
  ) %>% 
  modify_caption("Physical Activity Mean Totals Stratified by Age and Sex")
Physical Activity Mean Totals Stratified by Age and Sex
Characteristic Female, N = 6552 Male, N = 5832
50-79y.o, N = 2,0001 All adults, N = 4,5521 50-79y.o, N = 1,7201 All adults, N = 4,1121
Verisense 8,325 (3,737) 8,880 (3,797) 8,878 (4,080) 9,830 (4,292)
Verisense rev 7,523 (4,266) 8,389 (4,437) 8,103 (4,503) 9,496 (5,004)
Oak 10,364 (4,662) 11,124 (4,738) 10,782 (4,949) 12,104 (5,361)
ADEPT 2,124 (1,459) 2,236 (1,372) 2,708 (1,757) 2,913 (1,704)
SCRF 9,760 (5,117) 10,576 (5,277) 10,928 (5,507) 12,284 (6,097)
SCSSL 7,973 (3,953) 8,144 (3,921) 9,373 (4,543) 9,939 (4,702)
Actilife 11,296 (3,739) 11,620 (3,719) 11,606 (4,040) 12,497 (4,289)
MIMS 13,259 (3,523) 13,822 (3,711) 12,053 (3,359) 13,186 (3,809)
log10MIMS 987 (151) 1,000 (151) 952 (151) 991 (159)
AC 2,529,371 (762,724) 2,648,040 (807,335) 2,232,571 (716,344) 2,466,015 (814,509)
log10AC 2,921 (363) 2,940 (358) 2,870 (372) 2,949 (378)
1 Mean (SD)

Supplement: peak values

Code
df_adult = df_accel %>% 
  mutate(weight = full_sample_2_year_mec_exam_weight / 2,
         weight_norm = weight / mean(weight)) %>%
  mutate(group = "All adults")

df_mort = df_mortality %>%
  mutate(weight = full_sample_2_year_mec_exam_weight / 2,
         weight_norm = weight / mean(weight)) %>%
  mutate(group = "50-79y.o") 


df = bind_rows(df_adult, df_mort) 

df_analysis_svy = survey::svydesign(
  id = ~ masked_variance_pseudo_psu,
  strata = ~ masked_variance_pseudo_stratum,
  weights = ~ weight_norm,
  data = df,
  nest = TRUE
)
df_analysis_svy %>% 
  tbl_strata(
    strata = gender,
    .tbl_fun =
      ~ .x %>%
      tbl_svysummary(.,
                     include = contains("peak1"),
                     by = group,
                     statistic = list(
                       all_continuous() ~ "{mean} ({sd})",
                       all_categorical() ~ "{n} ({p}%)"
                     )),
    .header = "**{strata}**, N = {n}"
  ) %>% 
  modify_caption("Physical Activity Peak 1 Min Values Stratified by Age and Sex")
Physical Activity Peak 1 Min Values Stratified by Age and Sex
Characteristic Female, N = 6552 Male, N = 5832
50-79y.o, N = 2,0001 All adults, N = 4,5521 50-79y.o, N = 1,7201 All adults, N = 4,1121
peak1_AC 12,348 (2,710) 13,379 (3,459) 12,056 (3,418) 13,383 (3,867)
peak1_PAXMTSM 54 (11) 59 (16) 55 (16) 62 (20)
peak1_acti_steps 71 (18) 74 (18) 82 (18) 84 (17)
peak1_adept_steps 63 (26) 66 (25) 68 (23) 70 (21)
peak1_log10AC 4.08 (0.08) 4.11 (0.10) 4.06 (0.11) 4.10 (0.11)
peak1_log10PAXMTSM 1.73 (0.08) 1.76 (0.10) 1.73 (0.10) 1.77 (0.12)
peak1_oak_steps_80 95 (18) 98 (18) 97 (15) 100 (14)
peak1_scrf_steps 102 (14) 104 (14) 101 (11) 104 (11)
peak1_scssl_steps 101 (17) 103 (17) 101 (15) 104 (15)
peak1_vs_revised_steps_15 87 (20) 91 (20) 91 (18) 95 (17)
peak1_vs_steps_15 82 (20) 84 (19) 86 (17) 88 (16)
1 Mean (SD)
Code
df_analysis_svy %>% 
  tbl_strata(
    strata = gender,
    .tbl_fun =
      ~ .x %>%
      tbl_svysummary(.,
                     include = contains("peak30"),
                     by = group,
                     statistic = list(
                       all_continuous() ~ "{mean} ({sd})",
                       all_categorical() ~ "{n} ({p}%)"
                     )),
    .header = "**{strata}**, N = {n}"
  ) %>% 
  modify_caption("Physical Activity Peak 30 Min Values Stratified by Age and Sex")
Physical Activity Peak 30 Min Values Stratified by Age and Sex
Characteristic Female, N = 6552 Male, N = 5832
50-79y.o, N = 2,0001 All adults, N = 4,5521 50-79y.o, N = 1,7201 All adults, N = 4,1121
peak30_AC 8,957 (1,693) 9,618 (2,216) 8,424 (2,111) 9,255 (2,381)
peak30_PAXMTSM 40 (7) 43 (10) 39 (10) 43 (12)
peak30_acti_steps 47 (13) 49 (13) 56 (16) 58 (15)
peak30_adept_steps 31 (19) 32 (18) 37 (18) 38 (17)
peak30_log10AC 3.94 (0.08) 3.96 (0.10) 3.91 (0.10) 3.94 (0.10)
peak30_log10PAXMTSM 1.60 (0.07) 1.63 (0.09) 1.58 (0.09) 1.62 (0.10)
peak30_oak_steps_80 66 (18) 69 (18) 71 (18) 74 (17)
peak30_scrf_steps 79 (18) 82 (18) 82 (16) 85 (15)
peak30_scssl_steps 76 (20) 79 (20) 80 (18) 84 (18)
peak30_vs_revised_steps_15 58 (19) 62 (20) 64 (20) 68 (19)
peak30_vs_steps_15 53 (17) 55 (17) 59 (18) 61 (16)
1 Mean (SD)

Single variable mortality prediction

Table 3 or supplement since duplicative of figure

Code
vlabs = c(
      "Age (yrs)",
      "Diabetes",
      "Mobility problem",
      "Cancer",
      "Alcohol use",
      "BMI Category",
      "Education category",
      "Smoking status",
      "Congenital Heart Disease",
      "Coronary Heart Failure",
      "General Health Condition",
      "Heart attack",
      "AC",
      "Actilife steps",
      "ADEPT steps",
      "log10 AC",
      "log10 MIMS",
      "Oak steps",
      "MIMS",
      "Stepcount RF steps",
      "Stepcount SSL steps",
      "Verisense rev. steps",
      "Verisense steps",
      "Peak30 AC",
      "Peak30 Actilife steps",
      "Peak30 ADEPT steps",
      "Peak30 log10 AC",
      "Peak30 log10 MIMS",
      "Peak30 Oak steps",
      "Peak30 MIMS",
      "Peak30 Stepcount RF steps",
      "Peak30 Stepcount SSL steps",
      "Peak30 Verisense rev. steps",
      "Peak30 Verisense steps",
      "Peak30 AC",
      "Peak30 Actilife steps",
      "Peak30 ADEPT steps",
      "Peak30 log10 AC",
      "Peak30 log10 MIMS",
      "Peak30 Oak steps",
      "Peak30 MIMS",
      "Peak30 Stepcount RF steps",
      "Peak30 Stepcount SSL steps",
      "Peak30 Verisense rev. steps",
      "Peak30 Verisense steps",
      "Race/ethnicity",
      "Stroke"
    )
data_summary = wt_single_male %>%
  group_by(variable) %>%
  mutate(ind = row_number()) %>%
  summarize(across(concordance, list(
    mean = ~ mean(.x), se = ~ sd(.x) / sqrt(n())
  ))) %>%
  mutate(variable_fac = factor(
    variable,
    labels = vlabs
  ))

df_means =
  df_mortality_win %>%
  filter(gender == "Male") %>%
  group_by(mortstat) %>%
  summarize(across(c(contains("mean"), contains("peak")), ~ mean(.x))) %>%
  pivot_longer(cols = -mortstat) %>%
  pivot_wider(names_from = mortstat,
              values_from = value,
              names_prefix = 'died_') %>%
  rename(variable = name)

# get the best variable based on concodance
best_var = data_summary %>%
  arrange(desc(concordance_mean)) %>%
  slice(1) %>%
  pull(variable)

best_var_vec = wt_single_male %>%
  filter(variable == best_var) %>%
  pull(concordance)

t_tests = wt_single_male %>%
  group_by(variable) %>%
  filter(variable != best_var) %>%
  nest() %>%
  mutate(t_test = map(
    data,
    ~ t.test(
      .x$concordance,
      best_var_vec,
      var.eq = FALSE,
      paired = FALSE,
      alternative = "less"
    )
  ),
  res = map(t_test, tidy)) %>%
  unnest(res) %>%
  ungroup() %>%
  select(variable, p.value)

# join t tests with summary data and make table
data_summary %>%
  left_join(t_tests) %>%
  left_join(df_means) %>%
  arrange(desc(concordance_mean)) %>%
  mutate(
    variable_fac = forcats::fct_reorder(variable_fac, concordance_mean),
    lb = concordance_mean - 1.96 * concordance_se,
    ub = concordance_mean + 1.96 * concordance_se,
    ci = paste0(
      sprintf("%0.3f", round(concordance_mean, 3)),
      " (",
      sprintf("%0.3f", round(lb, 3)),
      ", ",
      sprintf("%0.3f", round(ub, 3)),
      ")"
    )
  ) %>%
  select(variable_fac,
         ci,
         p.value,
         mean_alive = died_0,
         mean_deceased = died_1) %>%
  mutate(pvalue = style_pvalue(p.value, digits = 3)) %>%
  mutate(pvalue = ifelse(is.na(pvalue), "---", pvalue),
         across(starts_with("mean"), ~ if_else(is.na(.x), "---", as.character(round(
           .x, 0
         ))))) %>%
  rename(
    Variable = variable_fac,
    "Mean (95% CI)" = ci,
    "p-value" = pvalue,
    "Mean value among alive" = mean_alive,
    "Mean value among deceased" = mean_deceased
  ) %>%
  gt::gt() %>%
  gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>%
  gt::tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_body(columns = everything(), rows = p.value > 0.05 |
                             is.na(p.value))
  ) %>%
  gt::cols_hide(p.value) %>%
  tab_header(title = "100x 10-fold Cross-Validated Single Variable Concordance", subtitle = "Male") %>%
  tab_footnote(footnote = "p-value for one sided t-test comparing variable to variable with higest mean concordance",
               locations = cells_column_labels(columns = "p-value")) %>%
  cols_align(columns = everything(), align = "left")
100x 10-fold Cross-Validated Single Variable Concordance
Male
Variable Mean (95% CI) Mean value among alive Mean value among deceased p-value1
Peak30 Verisense rev. steps 0.731 (0.726, 0.735) 65 47 ---
Peak30 Actilife steps 0.727 (0.722, 0.732) 57 44 0.128
Peak30 Verisense rev. steps 0.726 (0.721, 0.730) 92 74 0.072
ADEPT steps 0.724 (0.718, 0.730) 2698 1534 0.051
Peak30 Stepcount RF steps 0.719 (0.714, 0.725) 83 66 <0.001
Peak30 Oak steps 0.719 (0.714, 0.724) 71 55 <0.001
Peak30 AC 0.719 (0.714, 0.724) 8440 6991 <0.001
Peak30 Verisense steps 0.718 (0.713, 0.724) 60 45 <0.001
Peak30 MIMS 0.718 (0.713, 0.723) 39 33 <0.001
Peak30 log10 AC 0.717 (0.712, 0.722) 4 4 <0.001
Peak30 log10 MIMS 0.716 (0.711, 0.721) 2 2 <0.001
Peak30 ADEPT steps 0.715 (0.710, 0.721) 37 24 <0.001
Peak30 Actilife steps 0.715 (0.710, 0.720) 83 67 <0.001
Verisense rev. steps 0.711 (0.705, 0.716) 8371 4973 <0.001
Oak steps 0.709 (0.703, 0.715) 11100 7122 <0.001
Verisense steps 0.707 (0.701, 0.713) 9198 5962 <0.001
Peak30 Stepcount SSL steps 0.707 (0.702, 0.713) 81 64 <0.001
Peak30 log10 AC 0.706 (0.702, 0.711) 4 4 <0.001
Peak30 AC 0.706 (0.701, 0.711) 12152 10027 <0.001
Peak30 log10 MIMS 0.705 (0.700, 0.710) 2 2 <0.001
Stepcount RF steps 0.705 (0.699, 0.712) 11227 7036 <0.001
Peak30 Stepcount RF steps 0.705 (0.700, 0.710) 102 90 <0.001
Peak30 MIMS 0.704 (0.699, 0.709) 55 45 <0.001
Peak30 Verisense steps 0.703 (0.698, 0.708) 87 71 <0.001
Actilife steps 0.701 (0.695, 0.707) 12017 8726 <0.001
Peak30 Oak steps 0.694 (0.689, 0.700) 98 82 <0.001
Peak30 Stepcount SSL steps 0.690 (0.684, 0.695) 102 89 <0.001
Peak30 ADEPT steps 0.689 (0.683, 0.694) 68 50 <0.001
Stepcount SSL steps 0.684 (0.678, 0.690) 9544 6278 <0.001
AC 0.678 (0.672, 0.684) 2315448 1788175 <0.001
General Health Condition 0.678 (0.673, 0.683) --- --- <0.001
MIMS 0.672 (0.666, 0.678) 12458 9950 <0.001
Mobility problem 0.665 (0.660, 0.670) --- --- <0.001
Age (yrs) 0.660 (0.655, 0.665) --- --- <0.001
log10 MIMS 0.641 (0.635, 0.646) 974 863 <0.001
log10 AC 0.633 (0.627, 0.638) 2928 2665 <0.001
Smoking status 0.590 (0.585, 0.595) --- --- <0.001
Alcohol use 0.577 (0.572, 0.582) --- --- <0.001
Diabetes 0.572 (0.568, 0.576) --- --- <0.001
Education category 0.546 (0.541, 0.550) --- --- <0.001
Coronary Heart Failure 0.543 (0.541, 0.546) --- --- <0.001
Cancer 0.543 (0.539, 0.547) --- --- <0.001
Congenital Heart Disease 0.541 (0.538, 0.544) --- --- <0.001
BMI Category 0.535 (0.530, 0.541) --- --- <0.001
Heart attack 0.535 (0.532, 0.538) --- --- <0.001
Stroke 0.519 (0.517, 0.521) --- --- <0.001
Race/ethnicity 0.495 (0.493, 0.498) --- --- <0.001
1 p-value for one sided t-test comparing variable to variable with higest mean concordance
Code
data_summary = wt_single_female %>%
  group_by(variable) %>%
  summarize(across(concordance, list(
    mean = ~ mean(.x), se = ~ sd(.x) / sqrt(n())
  ))) %>%
  mutate(variable_fac = factor(
    variable,
    labels = vlabs))

df_means =
  df_mortality_win %>%
  filter(gender == "Female") %>%
  group_by(mortstat) %>%
  summarize(across(c(contains("mean"), contains("peak")), ~ mean(.x))) %>%
  pivot_longer(cols = -mortstat) %>%
  pivot_wider(names_from = mortstat,
              values_from = value,
              names_prefix = 'died_') %>%
  rename(variable = name)

# get the best variable based on concodance
best_var = data_summary %>%
  arrange(desc(concordance_mean)) %>%
  slice(1) %>%
  pull(variable)

best_var_vec = wt_single_female %>%
  filter(variable == best_var) %>%
  pull(concordance)

t_tests = wt_single_female %>%
  group_by(variable) %>%
  filter(variable != best_var) %>%
  nest() %>%
  mutate(t_test = map(
    data,
    ~ t.test(
      .x$concordance,
      best_var_vec,
      var.eq = FALSE,
      paired = FALSE,
      alternative = "less"
    )
  ),
  res = map(t_test, tidy)) %>%
  unnest(res) %>%
  ungroup() %>%
  select(variable, p.value)

# join t tests with summary data and make table
data_summary %>%
  left_join(t_tests) %>%
  left_join(df_means) %>%
  arrange(desc(concordance_mean)) %>%
  mutate(
    variable_fac = forcats::fct_reorder(variable_fac, concordance_mean),
    lb = concordance_mean - 1.96 * concordance_se,
    ub = concordance_mean + 1.96 * concordance_se,
    ci = paste0(
      sprintf("%0.3f", round(concordance_mean, 3)),
      " (",
      sprintf("%0.3f", round(lb, 3)),
      ", ",
      sprintf("%0.3f", round(ub, 3)),
      ")"
    )
  ) %>%
  select(variable_fac,
         ci,
         p.value,
         mean_alive = died_0,
         mean_deceased = died_1) %>%
  mutate(pvalue = style_pvalue(p.value, digits = 3)) %>%
  mutate(pvalue = ifelse(is.na(pvalue), "---", pvalue),
         across(starts_with("mean"), ~ if_else(is.na(.x), "---", as.character(round(
           .x, 0
         ))))) %>%
  rename(
    Variable = variable_fac,
    "Mean (95% CI)" = ci,
    "p-value" = pvalue,
    "Mean value among alive" = mean_alive,
    "Mean value among deceased" = mean_deceased
  ) %>%
  gt::gt() %>%
  gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>%
  gt::tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_body(columns = everything(), rows = p.value > 0.05 |
                             is.na(p.value))
  ) %>%
  gt::cols_hide(p.value) %>%
  tab_header(title = "100x 10-fold Cross-Validated Single Variable Concordance", subtitle = "Female") %>%
  tab_footnote(footnote = "p-value for one sided t-test comparing variable to variable with higest mean concordance",
               locations = cells_column_labels(columns = "p-value")) %>%
  cols_align(columns = everything(), align = "left")
100x 10-fold Cross-Validated Single Variable Concordance
Female
Variable Mean (95% CI) Mean value among alive Mean value among deceased p-value1
Stepcount RF steps 0.779 (0.775, 0.783) 10038 5828 ---
Peak30 Stepcount RF steps 0.761 (0.756, 0.766) 80 61 <0.001
Peak30 Actilife steps 0.753 (0.748, 0.758) 48 38 <0.001
Peak30 MIMS 0.749 (0.745, 0.754) 54 46 <0.001
ADEPT steps 0.748 (0.744, 0.753) 2125 1186 <0.001
Peak30 Verisense rev. steps 0.748 (0.744, 0.753) 89 70 <0.001
Peak30 log10 MIMS 0.748 (0.744, 0.753) 2 2 <0.001
Peak30 AC 0.748 (0.744, 0.752) 12414 10497 <0.001
Peak30 Stepcount RF steps 0.748 (0.743, 0.753) 103 89 <0.001
Peak30 log10 AC 0.748 (0.743, 0.752) 4 4 <0.001
Oak steps 0.747 (0.743, 0.751) 10623 6999 <0.001
Stepcount SSL steps 0.747 (0.743, 0.751) 8126 5160 <0.001
Peak30 Verisense rev. steps 0.747 (0.742, 0.752) 59 42 <0.001
Peak30 Actilife steps 0.746 (0.741, 0.750) 72 57 <0.001
Peak30 Oak steps 0.745 (0.740, 0.750) 67 51 <0.001
Peak30 Stepcount SSL steps 0.745 (0.740, 0.750) 77 57 <0.001
Peak30 Stepcount SSL steps 0.742 (0.738, 0.747) 102 85 <0.001
Peak30 MIMS 0.742 (0.738, 0.747) 40 35 <0.001
Peak30 log10 MIMS 0.741 (0.737, 0.746) 2 2 <0.001
Peak30 AC 0.741 (0.736, 0.745) 8973 7576 <0.001
Peak30 log10 AC 0.740 (0.735, 0.744) 4 4 <0.001
Verisense rev. steps 0.739 (0.735, 0.744) 7717 4757 <0.001
Peak30 Oak steps 0.739 (0.735, 0.744) 96 79 <0.001
Peak30 Verisense steps 0.738 (0.733, 0.743) 54 40 <0.001
Peak30 ADEPT steps 0.736 (0.731, 0.740) 31 20 <0.001
Verisense steps 0.735 (0.730, 0.739) 8613 5788 <0.001
Peak30 Verisense steps 0.730 (0.725, 0.735) 83 66 <0.001
Actilife steps 0.727 (0.723, 0.731) 11593 8758 <0.001
Peak30 ADEPT steps 0.716 (0.711, 0.721) 64 44 <0.001
Age (yrs) 0.697 (0.692, 0.703) --- --- <0.001
Mobility problem 0.695 (0.691, 0.700) --- --- <0.001
AC 0.694 (0.689, 0.699) 2575014 2041764 <0.001
MIMS 0.689 (0.684, 0.693) 13474 11056 <0.001
log10 MIMS 0.649 (0.643, 0.654) 998 910 <0.001
General Health Condition 0.640 (0.635, 0.646) --- --- <0.001
log10 AC 0.626 (0.620, 0.631) 2949 2761 <0.001
Diabetes 0.615 (0.610, 0.620) --- --- <0.001
Education category 0.592 (0.587, 0.597) --- --- <0.001
Smoking status 0.587 (0.582, 0.592) --- --- <0.001
Congenital Heart Disease 0.559 (0.556, 0.562) --- --- <0.001
Heart attack 0.558 (0.555, 0.561) --- --- <0.001
Coronary Heart Failure 0.556 (0.553, 0.559) --- --- <0.001
Stroke 0.551 (0.548, 0.554) --- --- <0.001
Cancer 0.535 (0.531, 0.539) --- --- <0.001
Alcohol use 0.524 (0.518, 0.529) --- --- <0.001
Race/ethnicity 0.523 (0.520, 0.526) --- --- <0.001
BMI Category 0.503 (0.498, 0.509) --- --- <0.001
1 p-value for one sided t-test comparing variable to variable with higest mean concordance
Code
vlabs = c("Age (yrs)", "Diabetes", 
                      "Mobility problem", "Cancer",
                      "Alcohol use", "BMI Category", "Education category",
                      "Smoking status", "Congenital Heart Disease", "Coronary Heart Failure", 
                      "General Health Condition", "Heart attack", "AC", "Actilife steps", "ADEPT steps", "log10 AC", "log10 MIMS", "Oak steps", "MIMS", "Stepcount RF steps", "Stepcount SSL steps", "Verisense rev. steps", "Verisense steps", "Race/ethnicity", "Stroke")

  data_summary = wt_single_male %>%
    filter(!grepl("peak", variable)) %>% 
    group_by(variable) %>%
    mutate(ind = row_number()) %>%
    # filter(ind <= 1000) %>%
    summarize(across(concordance, list(mean = ~mean(.x),
                                       se = ~sd(.x)/sqrt(n())))) %>%
    mutate(variable_fac = factor(variable,
           labels = vlabs))
  
  df_means = 
    df_mortality_win %>% 
    filter(gender == "Male") %>% 
    group_by(mortstat) %>% 
    summarize(across(c(contains("mean"), age_in_years_at_screening), ~mean(.x))) %>% 
    pivot_longer(cols = -mortstat) %>% 
    pivot_wider(names_from = mortstat, values_from = value, names_prefix = 'died_') %>% 
    rename(variable = name)
  
  # get the best variable based on concodance
  best_var = data_summary %>%
    arrange(desc(concordance_mean)) %>% 
    slice(1) %>%
    pull(variable)

  best_var_vec = wt_single_male %>%
    filter(variable == best_var) %>%
    pull(concordance)

  t_tests = wt_single_male %>%
    group_by(variable) %>%
    filter(variable != best_var) %>%
    nest() %>%
    mutate(t_test = map(data, ~ t.test(.x$concordance, best_var_vec, var.eq = FALSE, paired = FALSE, alternative = "less")),
           res = map(t_test, tidy)) %>%
    unnest(res) %>%
    ungroup() %>%
    select(variable, p.value)

  # join t tests with summary data and make table
   tb = data_summary %>%
    left_join(t_tests) %>%
    left_join(df_means) %>% 
    arrange(desc(concordance_mean)) %>% 
    mutate(variable_fac = forcats::fct_reorder(variable_fac, concordance_mean), 
          lb = concordance_mean - 1.96 * concordance_se,
           ub = concordance_mean + 1.96 * concordance_se,
           ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")")) %>%
    select(variable_fac, ci, p.value, mean_alive = died_0, mean_deceased = died_1) %>%
    mutate(pvalue = style_pvalue(p.value, digits = 3)) %>%
    mutate(pvalue = ifelse(is.na(pvalue), "---", pvalue),
           across(starts_with("mean"), ~if_else(is.na(.x), "---", as.character(round(.x, 0))))) %>% 
    select(-p.value) %>% 
    rename(Variable = variable_fac,
           "Mean (95% CI)" = ci,
           "p-value" = pvalue,
           "Mean value among alive" = mean_alive,
           "Mean value among deceased" = mean_deceased) %>%
    gt::gt() %>%
    gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "100x 10-fold Cross-Validated Single Variable Concordance",
               subtitle = "Male") %>%
    tab_footnote(
      footnote = "p-value for one sided t-test comparing variable to variable with higest mean concordance",
      locations = cells_column_labels(columns = "p-value")
    ) %>%
    cols_align(columns = everything(), align = "left") %>% 
    as.data.frame() %>% 
    kableExtra::kbl("latex", booktabs = TRUE)

  
  data_summary = wt_single_female %>%
    filter(!grepl("peak", variable)) %>% 
    group_by(variable) %>%
    summarize(across(concordance, list(mean = ~mean(.x),
                                       se = ~sd(.x)/sqrt(n())))) %>%
    mutate(variable_fac = factor(variable,
           labels = vlabs))
  
  df_means = 
    df_mortality_win %>% 
    filter(gender == "Female") %>% 
    group_by(mortstat) %>% 
    summarize(across(c(contains("mean"), contains("peak"), 
                       age_in_years_at_screening), ~mean(.x))) %>% 
    pivot_longer(cols = -mortstat) %>% 
    pivot_wider(names_from = mortstat, values_from = value, names_prefix = 'died_') %>% 
    rename(variable = name)
  
  # get the best variable based on concodance
  best_var = data_summary %>%
    arrange(desc(concordance_mean)) %>% 
    slice(1) %>%
    pull(variable)

  best_var_vec = wt_single_female %>%
    filter(variable == best_var) %>%
    pull(concordance)

  t_tests = wt_single_female %>%
    group_by(variable) %>%
    filter(variable != best_var) %>%
    nest() %>%
    mutate(t_test = map(data, ~ t.test(.x$concordance, best_var_vec, var.eq = FALSE, paired = FALSE, alternative = "less")),
           res = map(t_test, tidy)) %>%
    unnest(res) %>%
    ungroup() %>%
    select(variable, p.value)

  # join t tests with summary data and make table
  tb = data_summary %>%
    left_join(t_tests) %>%
    left_join(df_means) %>% 
    arrange(desc(concordance_mean)) %>% 
    mutate(variable_fac = forcats::fct_reorder(variable_fac, concordance_mean), 
          lb = concordance_mean - 1.96 * concordance_se,
           ub = concordance_mean + 1.96 * concordance_se,
           ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")")) %>%
    select(variable_fac, ci, p.value, mean_alive = died_0, mean_deceased = died_1) %>%
    mutate(pvalue = style_pvalue(p.value, digits = 3)) %>%
    mutate(pvalue = ifelse(is.na(pvalue), "---", pvalue),
           across(starts_with("mean"), ~if_else(is.na(.x), "---", as.character(round(.x, 0))))) %>% 
    select(-p.value) %>% 
    rename(Variable = variable_fac,
           "Mean (95% CI)" = ci,
           "p-value" = pvalue,
           "Mean value among alive" = mean_alive,
           "Mean value among deceased" = mean_deceased) %>%
    gt::gt() %>%
    gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "100x 10-fold Cross-Validated Single Variable Concordance",
               subtitle = "Female") %>%
    tab_footnote(
      footnote = "p-value for one sided t-test comparing variable to variable with higest mean concordance",
      locations = cells_column_labels(columns = "p-value")
    ) %>%
    cols_align(columns = everything(), align = "left") %>% 
    as.data.frame() %>% 
    kableExtra::kbl("latex", booktabs = TRUE)

Multivariable mortality prediction - concordance and model summaries

Still trying to figure out how to present this

Code
wt_male_small = 
  wt_male %>% 
  filter(model %in% c("Demo only", "adept_steps+PAXMTSM", "adept_steps", "PAXMTSM"))

summary = 
  wt_male_small %>% 
  group_by(model) %>% 
  summarize(across(concordance, list(mean = ~mean(.x),
                                       se = ~sd(.x)/sqrt(n())))) %>% 
  mutate(model_fac = factor(model, labels = c("Demographics + ADEPT",
                                              "Demographics + ADEPT + MIMS", 
                                              "Demographics only", 
                                              "Demographics + MIMS")))

coef_adept = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_adept_steps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate * 500), 
         lb = exp((estimate - 1.96 * std.error) * 500),
         ub = exp((estimate + 1.96 * std.error) * 500),
         model = "adept_steps")
  
coef_mims = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_PAXMTSM + mean_adept_steps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate * 500), 
         lb = exp((estimate - 1.96 * std.error) * 500),
         ub = exp((estimate + 1.96 * std.error) * 500),
         model = "adept_steps+PAXMTSM")
  
coefs = bind_rows(coef_adept, coef_mims)
summary %>% 
  left_join(coefs)  %>% 
  mutate(conc_lb = concordance_mean - 1.96 * concordance_se,
         conc_ub = concordance_mean + 1.96 * concordance_se,
         conc_ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(conc_lb, 3)), ", ", sprintf("%0.3f",round(conc_ub, 3)), ")"),
         hr_ci = case_when(!is.na(hr) ~ paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"),
                       TRUE ~ "---"),
         p.value = format.pval(p.value, digits = 3)) %>% 
  select(model_fac, hr_ci,p.value, conc_ci) %>% 
  rename(Model = model_fac,
         "Steps HR" = hr_ci,
         "Steps p-value" = p.value,
         "Model concordance" = conc_ci) %>% 
  gt::gt() %>%
    # gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "Multivariable Model Comparison",
               subtitle = "Male") %>%
    cols_align(columns = everything(), align = "left")
Multivariable Model Comparison
Male
Model Steps HR Steps p-value Model concordance
Demographics only --- NA 0.730 (0.725, 0.736)
Demographics + MIMS --- NA 0.736 (0.730, 0.741)
Demographics + ADEPT 0.902 (0.841, 0.967) 0.0777 0.735 (0.729, 0.740)
Demographics + ADEPT + MIMS 0.940 (0.870, 1.016) 0.2705 0.735 (0.730, 0.740)
Code
tb = 
  summary %>% 
  left_join(coefs)  %>% 
  mutate(conc_lb = concordance_mean - 1.96 * concordance_se,
         conc_ub = concordance_mean + 1.96 * concordance_se,
         conc_ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(conc_lb, 3)), ", ", sprintf("%0.3f",round(conc_ub, 3)), ")"),
         hr_ci = case_when(!is.na(hr) ~ paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"),
                       TRUE ~ "---"),
         p.value = format.pval(p.value, digits = 3)) %>% 
  select(model_fac, hr_ci,p.value, conc_ci) %>% 
  rename(Model = model_fac,
         "Steps HR" = hr_ci,
         "Steps p-value" = p.value,
         "Model concordance" = conc_ci) %>% 
  gt::gt() %>%
    # gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "Multivariable Model Comparison",
               subtitle = "Male") %>%
    cols_align(columns = everything(), align = "left") %>% 
  data.frame() %>%
  kableExtra::kbl(format = "latex")
Code
wt_female_small = 
  wt_female %>% 
  filter(model %in% c("Demo only", "scrf_steps+PAXMTSM", "scrf_steps", "PAXMTSM"))

summary = 
  wt_female_small %>% 
  group_by(model) %>% 
  summarize(across(concordance, list(mean = ~mean(.x),
                                       se = ~sd(.x)/sqrt(n())))) %>% 
  mutate(model_fac = factor(model, labels = c("Demographics only",
                                              "Demographics + MIMS", 
                                              "Demographics + Stepcount RF",
                                              "Demographics + Stepcount RF + MIMS")))

coef_scrf = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_scrf_steps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate * 500), 
         lb = exp((estimate - 1.96 * std.error) * 500),
         ub = exp((estimate + 1.96 * std.error) * 500),
         model = "scrf_steps")
  
coef_mims = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_PAXMTSM + mean_scrf_steps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate * 500), 
         lb = exp((estimate - 1.96 * std.error) * 500),
         ub = exp((estimate + 1.96 * std.error) * 500),
         model = "scrf_steps+PAXMTSM")
  
coefs = bind_rows(coef_scrf, coef_mims)
summary %>% 
  left_join(coefs)  %>% 
  mutate(conc_lb = concordance_mean - 1.96 * concordance_se,
         conc_ub = concordance_mean + 1.96 * concordance_se,
         conc_ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(conc_lb, 3)), ", ", sprintf("%0.3f",round(conc_ub, 3)), ")"),
         hr_ci = case_when(!is.na(hr) ~ paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"),
                       TRUE ~ "---"),
         p.value = format.pval(p.value, digits = 3)) %>% 
  select(model_fac, hr_ci,p.value, conc_ci) %>% 
  rename(Model = model_fac,
         "Steps HR" = hr_ci,
         "Steps p-value" = p.value,
         "Model concordance" = conc_ci) %>% 
  gt::gt() %>%
    # gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "Multivariable Model Comparison",
               subtitle = "Female") %>%
    cols_align(columns = everything(), align = "left")
Multivariable Model Comparison
Female
Model Steps HR Steps p-value Model concordance
Demographics only --- NA 0.775 (0.771, 0.779)
Demographics + MIMS --- NA 0.780 (0.776, 0.783)
Demographics + Stepcount RF 0.928 (0.902, 0.953) 6.50e-08 0.796 (0.793, 0.800)
Demographics + Stepcount RF + MIMS 0.895 (0.861, 0.930) 1.91e-05 0.795 (0.791, 0.799)
Code
tb = 
  summary %>% 
  left_join(coefs)  %>% 
  mutate(conc_lb = concordance_mean - 1.96 * concordance_se,
         conc_ub = concordance_mean + 1.96 * concordance_se,
         conc_ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(conc_lb, 3)), ", ", sprintf("%0.3f",round(conc_ub, 3)), ")"),
         hr_ci = case_when(!is.na(hr) ~ paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"),
                       TRUE ~ "---"),
         p.value = format.pval(p.value, digits = 3)) %>% 
  select(model_fac, hr_ci,p.value, conc_ci) %>% 
  rename(Model = model_fac,
         "Steps HR" = hr_ci,
         "Steps p-value" = p.value,
         "Model concordance" = conc_ci) %>% 
  gt::gt() %>%
    # gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "Multivariable Model Comparison",
               subtitle = "Female") %>%
    cols_align(columns = everything(), align = "left") %>% 
  data.frame() %>% 
  kableExtra::kbl(format = "latex")
Code
model_steps_vs = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_vs_revised_steps_15 + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .)

model_steps_vs %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_mims = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_PAXMTSM + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_mims %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_vs_mims = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_vs_revised_steps_15 + mean_PAXMTSM + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_vs_mims %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_vs_ac = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_vs_revised_steps_15 + mean_AC + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_vs_ac %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 
Code
make_multivar_tab(wt_female, subt = "Female")
Code
model_steps_sc = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_scrf_steps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .)

model_steps_sc %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_mims = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_PAXMTSM + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_mims %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_sc_mims = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_scrf_steps + mean_PAXMTSM + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_sc_mims %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_sc_ac = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_scrf_steps + mean_AC + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_sc_ac %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

With step cadence

Code
wt_male_cad = readRDS(here::here("results", "metrics_wtd_100_male_cadence.rds"))


wt_male_small = 
  wt_male_cad %>% 
  filter(model %in% c("vs_revised_steps_15", "vs_revised_steps_15+peak1_vs_revised_steps_15",
                      "peak1_vs_revised_steps_15"))

summary = 
  wt_male_small %>% 
  group_by(model) %>% 
  summarize(across(concordance, list(mean = ~mean(.x),
                                       se = ~sd(.x)/sqrt(n())))) %>% 
  mutate(model_fac = factor(model, labels = c("Demographics + Peak1 Cadence", 
                                                "Demographics + Total Steps", 
                                                "Demographics + Steps + Peak1 Cadence")))

coef_steps = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_vs_revised_steps_15 + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate * 500), 
         lb = exp((estimate - 1.96 * std.error) * 500),
         ub = exp((estimate + 1.96 * std.error) * 500),
         step_ci = paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"), model = "vs_revised_steps_15") %>% 
  rename(step_p = p.value) %>% 
  select(model, step_ci, step_p)

  
coef_cad = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_vs_revised_steps_15 + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate *10), 
         lb = exp((estimate - 1.96 * std.error)*10 ),
         ub = exp((estimate + 1.96 * std.error)*10),
         cad_ci = paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"), model = "peak1_vs_revised_steps_15") %>% 
  rename(cad_p = p.value) %>% 
  select(model, cad_ci, cad_p)


coef_both = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ peak1_vs_revised_steps_15 + mean_vs_revised_steps_15 +
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term))
  
coef_cad_steps = 
  coef_both %>% 
  filter(grepl("peak", term)) %>% 
  mutate(hr = exp(estimate*10), 
         lb = exp((estimate - 1.96 * std.error)*10 ),
         ub = exp((estimate + 1.96 * std.error)*10),
         cad_ci = paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"), model = "vs_revised_steps_15+peak1_vs_revised_steps_15") %>% 
  rename(cad_p = p.value)

coef_cad_steps2 = 
  coef_both %>% 
  filter(!grepl("peak", term)) %>% 
  mutate(hr = exp(estimate *500), 
         lb = exp((estimate - 1.96 * std.error) *500),
         ub = exp((estimate + 1.96 * std.error)*500 ),
         step_ci = paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"), model = "vs_revised_steps_15+peak1_vs_revised_steps_15") %>% 
  rename(step_p = p.value)

coef2 = coef_cad_steps %>% 
  left_join(coef_cad_steps2, by = "model") %>% 
  select(model, cad_ci, cad_p, step_ci, step_p) %>% 
  bind_rows(coef_steps) %>% 
  bind_rows(coef_cad)


summary %>% 
  left_join(coef2)  %>% 
  mutate(conc_lb = concordance_mean - 1.96 * concordance_se,
         conc_ub = concordance_mean + 1.96 * concordance_se,
         conc_ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(conc_lb, 3)), ", ", sprintf("%0.3f",round(conc_ub, 3)), ")"),
         across(contains("_p"), ~format.pval(.x, digits = 3))) %>% 
  select(model_fac, step_ci, step_p, cad_ci, cad_p, conc_ci)   %>% 
  rename(Model = model_fac,
         "Steps HR" = step_ci,
         "Steps p-value" = step_p,
         "Cadence HR" = cad_ci,
         "Cadence p-value" = cad_p,
         "Model concordance" = conc_ci) %>% 
  gt::gt() %>%
    # gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "Comparison of Addition of Cadence Variables",
               subtitle = "Male") %>%
    cols_align(columns = everything(), align = "left")
Comparison of Addition of Cadence Variables
Male
Model Steps HR Steps p-value Cadence HR Cadence p-value Model concordance
Demographics + Peak1 Cadence NA NA 0.999 (0.999, 0.999) 0.00307 0.741 (0.736, 0.746)
Demographics + Total Steps 0.952 (0.929, 0.975) 0.00307 NA NA 0.739 (0.734, 0.745)
Demographics + Steps + Peak1 Cadence 0.973 (0.945, 1.003) 0.20468 0.877 (0.787, 0.977) 0.07502 0.741 (0.735, 0.746)
Code
wt_female_cad = readRDS(here::here("results", "metrics_wtd_100_female_cadence.rds"))


wt_female_small = 
  wt_female_cad %>% 
  filter(model %in% c("scrf_steps", "scrf_steps+peak1_scrf_steps",
                      "peak1_scrf_steps"))

summary = 
  wt_female_small %>% 
  group_by(model) %>% 
  summarize(across(concordance, list(mean = ~mean(.x),
                                       se = ~sd(.x)/sqrt(n())))) %>% 
  mutate(model_fac = factor(model, labels = c("Demographics + Peak1 Cadence", 
                                                "Demographics + Total Steps", 
                                                "Demographics + Steps + Peak1 Cadence")))

coef_steps = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_scrf_steps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate * 500), 
         lb = exp((estimate - 1.96 * std.error) * 500),
         ub = exp((estimate + 1.96 * std.error) * 500),
         step_ci = paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"), model = "scrf_steps") %>% 
  rename(step_p = p.value) %>% 
  select(model, step_ci, step_p)

  
coef_cad = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_scrf_steps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate *10), 
         lb = exp((estimate - 1.96 * std.error)*10 ),
         ub = exp((estimate + 1.96 * std.error)*10),
         cad_ci = paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"), model = "peak1_scrf_steps") %>% 
  rename(cad_p = p.value) %>% 
  select(model, cad_ci, cad_p)


coef_both = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ peak1_scrf_steps + mean_scrf_steps +
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term))
  
coef_cad_steps = 
  coef_both %>% 
  filter(grepl("peak", term)) %>% 
  mutate(hr = exp(estimate*10), 
         lb = exp((estimate - 1.96 * std.error)*10 ),
         ub = exp((estimate + 1.96 * std.error)*10),
         cad_ci = paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"), model = "scrf_steps+peak1_scrf_steps") %>% 
  rename(cad_p = p.value)

coef_cad_steps2 = 
  coef_both %>% 
  filter(!grepl("peak", term)) %>% 
  mutate(hr = exp(estimate *500), 
         lb = exp((estimate - 1.96 * std.error) *500),
         ub = exp((estimate + 1.96 * std.error)*500 ),
         step_ci = paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"), model = "scrf_steps+peak1_scrf_steps") %>% 
  rename(step_p = p.value)

coef2 = coef_cad_steps %>% 
  left_join(coef_cad_steps2, by = "model") %>% 
  select(model, cad_ci, cad_p, step_ci, step_p) %>% 
  bind_rows(coef_steps) %>% 
  bind_rows(coef_cad)


summary %>% 
  left_join(coef2)  %>% 
  mutate(conc_lb = concordance_mean - 1.96 * concordance_se,
         conc_ub = concordance_mean + 1.96 * concordance_se,
         conc_ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(conc_lb, 3)), ", ", sprintf("%0.3f",round(conc_ub, 3)), ")"),
         across(contains("_p"), ~format.pval(.x, digits = 3))) %>% 
  select(model_fac, step_ci, step_p, cad_ci, cad_p, conc_ci)   %>% 
  rename(Model = model_fac,
         "Steps HR" = step_ci,
         "Steps p-value" = step_p,
         "Cadence HR" = cad_ci,
         "Cadence p-value" = cad_p,
         "Model concordance" = conc_ci) %>% 
  gt::gt() %>%
    # gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "Comparison of Addition of Cadence Variables",
               subtitle = "Female") %>%
    cols_align(columns = everything(), align = "left")
Comparison of Addition of Cadence Variables
Female
Model Steps HR Steps p-value Cadence HR Cadence p-value Model concordance
Demographics + Peak1 Cadence NA NA 0.998 (0.998, 0.999) 6.5e-08 0.783 (0.779, 0.786)
Demographics + Total Steps 0.928 (0.902, 0.953) 6.50e-08 NA NA 0.796 (0.793, 0.800)
Demographics + Steps + Peak1 Cadence 0.930 (0.898, 0.964) 7.29e-05 0.980 (0.834, 1.151) 0.812 0.795 (0.792, 0.799)
Code
wt_male_cad = readRDS(here::here("results", "metrics_wtd_100_male_cadence.rds"))
make_multivar_tab(wt_male_cad, subt = "Male")

model_steps_sc_cad = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_vs_revised_steps_15 + peak1_vs_revised_steps_15 + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_sc_cad %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("revised", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_sc_cad = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_vs_revised_steps_15 + peak30_vs_revised_steps_15 +
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 


model_steps_sc_cad %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("revised", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 
Code
wt_female_cad = readRDS(here::here("results", "metrics_wtd_100_female_cadence.rds"))
make_multivar_tab(wt_female_cad, subt = "Female")

model_steps_sc_cad = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_scrf_steps + peak1_scrf_steps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_sc_cad %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("scrf", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_sc_cad = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ mean_scrf_steps + peak30_scrf_steps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 


model_steps_sc_cad %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("scrf", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

Sensivity analysis

Use anyone with at least one valid day, compute results

Code
sens_male_single = readRDS(here::here("results", "metrics_wtd_100_singlevar_male_sens.rds")) 
sens_female_single = readRDS(here::here("results", "metrics_wtd_100_singlevar_female_sens.rds")) 

df_mortality_sens =
  df_all %>%
  filter(num_valid_days > 0) %>%
  filter(age_in_years_at_screening >= 50 &
           age_in_years_at_screening < 80)  %>% 
  filter(if_all(.cols = c(age_in_years_at_screening, gender,
                          race_hispanic_origin, cat_education,
                          cat_bmi, chd, chf, heartattack, stroke, cancer,
                          bin_diabetes, cat_alcohol, cat_smoke, bin_mobilityproblem,
                          general_health_condition, mortstat, permth_exm, mean_PAXMTSM),
                ~!is.na(.x))) %>%
  mutate(event_time = permth_exm / 12) %>% 
  ungroup() %>%
  mutate(across(c(contains("mean"), contains("peak")), ~DescTools::Winsorize(.x, probs = c(0, 0.99)))) 

data_summary = sens_male_single %>%
  group_by(variable) %>%
  mutate(ind = row_number()) %>%
  summarize(across(concordance, list(
    mean = ~ mean(.x), se = ~ sd(.x) / sqrt(n())
  ))) %>%
  mutate(variable_fac = factor(
    variable
    # labels = vlabs
  ))

df_means =
  df_mortality_sens %>%
  filter(gender == "Male") %>%
  group_by(mortstat) %>%
  summarize(across(c(contains("mean"), contains("peak")), ~ mean(.x))) %>%
  pivot_longer(cols = -mortstat) %>%
  pivot_wider(names_from = mortstat,
              values_from = value,
              names_prefix = 'died_') %>%
  rename(variable = name)

# get the best variable based on concodance
best_var = data_summary %>%
  arrange(desc(concordance_mean)) %>%
  slice(1) %>%
  pull(variable)

best_var_vec = sens_male_single %>%
  filter(variable == best_var) %>%
  pull(concordance)

t_tests = sens_male_single %>%
  group_by(variable) %>%
  filter(variable != best_var) %>%
  nest() %>%
  mutate(t_test = map(
    data,
    ~ t.test(
      .x$concordance,
      best_var_vec,
      var.eq = FALSE,
      paired = FALSE,
      alternative = "less"
    )
  ),
  res = map(t_test, tidy)) %>%
  unnest(res) %>%
  ungroup() %>%
  select(variable, p.value)

# join t tests with summary data and make table
data_summary %>%
  left_join(t_tests) %>%
  left_join(df_means) %>%
  arrange(desc(concordance_mean)) %>%
  mutate(
    variable_fac = forcats::fct_reorder(variable_fac, concordance_mean),
    lb = concordance_mean - 1.96 * concordance_se,
    ub = concordance_mean + 1.96 * concordance_se,
    ci = paste0(
      sprintf("%0.3f", round(concordance_mean, 3)),
      " (",
      sprintf("%0.3f", round(lb, 3)),
      ", ",
      sprintf("%0.3f", round(ub, 3)),
      ")"
    )
  ) %>%
  select(variable_fac,
         ci,
         p.value,
         mean_alive = died_0,
         mean_deceased = died_1) %>%
  mutate(pvalue = style_pvalue(p.value, digits = 3)) %>%
  mutate(pvalue = ifelse(is.na(pvalue), "---", pvalue),
         across(starts_with("mean"), ~ if_else(is.na(.x), "---", as.character(round(
           .x, 0
         ))))) %>%
  rename(
    Variable = variable_fac,
    "Mean (95% CI)" = ci,
    "p-value" = pvalue,
    "Mean value among alive" = mean_alive,
    "Mean value among deceased" = mean_deceased
  ) %>%
  gt::gt() %>%
  gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>%
  gt::tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_body(columns = everything(), rows = p.value > 0.05 |
                             is.na(p.value))
  ) %>%
  gt::cols_hide(p.value) %>%
  tab_header(title = "100x 10-fold Cross-Validated Single Variable Concordance", subtitle = "Male") %>%
  tab_footnote(footnote = "p-value for one sided t-test comparing variable to variable with higest mean concordance",
               locations = cells_column_labels(columns = "p-value")) %>%
  cols_align(columns = everything(), align = "left")
100x 10-fold Cross-Validated Single Variable Concordance
Male
Variable Mean (95% CI) Mean value among alive Mean value among deceased p-value1
mean_adept_steps 0.722 (0.716, 0.728) 2698 1534 ---
peak30_vs_revised_steps_15 0.721 (0.716, 0.726) 65 47 0.361
peak30_AC 0.719 (0.714, 0.724) 8440 6991 0.171
peak30_acti_steps 0.717 (0.712, 0.722) 57 44 0.100
peak30_log10AC 0.717 (0.712, 0.722) 4 4 0.086
peak30_PAXMTSM 0.716 (0.711, 0.721) 39 33 0.064
peak30_log10PAXMTSM 0.715 (0.709, 0.720) 2 2 0.026
peak1_vs_revised_steps_15 0.713 (0.708, 0.717) 92 74 0.005
peak30_scrf_steps 0.712 (0.707, 0.717) 83 66 0.006
peak30_oak_steps_80 0.712 (0.707, 0.717) 71 55 0.005
peak30_vs_steps_15 0.711 (0.706, 0.716) 60 45 0.002
mean_oak_steps_80 0.709 (0.704, 0.715) 11100 7122 <0.001
peak30_adept_steps 0.708 (0.703, 0.714) 37 24 <0.001
mean_vs_revised_steps_15 0.708 (0.703, 0.714) 8371 4973 <0.001
peak1_log10AC 0.708 (0.703, 0.712) 4 4 <0.001
peak1_AC 0.707 (0.702, 0.712) 12152 10027 <0.001
mean_vs_steps_15 0.707 (0.701, 0.712) 9198 5962 <0.001
mean_scrf_steps 0.706 (0.700, 0.712) 11227 7036 <0.001
peak1_acti_steps 0.703 (0.698, 0.708) 83 67 <0.001
mean_acti_steps 0.701 (0.695, 0.707) 12017 8726 <0.001
peak1_log10PAXMTSM 0.701 (0.695, 0.706) 2 2 <0.001
peak1_PAXMTSM 0.700 (0.695, 0.705) 55 45 <0.001
peak30_scssl_steps 0.700 (0.694, 0.705) 81 64 <0.001
peak1_vs_steps_15 0.693 (0.688, 0.698) 87 71 <0.001
peak1_scrf_steps 0.693 (0.688, 0.698) 102 90 <0.001
mean_scssl_steps 0.685 (0.679, 0.691) 9544 6278 <0.001
peak1_oak_steps_80 0.683 (0.677, 0.688) 98 82 <0.001
mean_AC 0.681 (0.676, 0.687) 2315448 1788175 <0.001
peak1_adept_steps 0.680 (0.675, 0.686) 68 50 <0.001
peak1_scssl_steps 0.678 (0.673, 0.683) 102 89 <0.001
general_health_condition 0.678 (0.672, 0.683) --- --- <0.001
mean_PAXMTSM 0.675 (0.669, 0.681) 12458 9950 <0.001
bin_mobilityproblem 0.663 (0.658, 0.667) --- --- <0.001
age_in_years_at_screening 0.656 (0.651, 0.661) --- --- <0.001
mean_log10PAXMTSM 0.645 (0.639, 0.651) 974 863 <0.001
mean_log10AC 0.636 (0.630, 0.642) 2928 2665 <0.001
cat_smoke 0.593 (0.588, 0.598) --- --- <0.001
bin_diabetes 0.569 (0.565, 0.573) --- --- <0.001
cat_alcohol 0.567 (0.562, 0.573) --- --- <0.001
chf 0.544 (0.541, 0.546) --- --- <0.001
cat_education 0.543 (0.539, 0.548) --- --- <0.001
cancer 0.542 (0.538, 0.546) --- --- <0.001
chd 0.541 (0.538, 0.544) --- --- <0.001
heartattack 0.535 (0.532, 0.538) --- --- <0.001
cat_bmi 0.532 (0.527, 0.537) --- --- <0.001
stroke 0.519 (0.516, 0.521) --- --- <0.001
race_hispanic_origin 0.499 (0.497, 0.502) --- --- <0.001
1 p-value for one sided t-test comparing variable to variable with higest mean concordance
Code
data_summary = sens_female_single %>%
  group_by(variable) %>%
  summarize(across(concordance, list(
    mean = ~ mean(.x), se = ~ sd(.x) / sqrt(n())
  ))) %>%
  mutate(variable_fac = factor(
    variable
    # labels = vlabs
    ))

df_means =
  df_mortality_sens %>%
  filter(gender == "Female") %>%
  group_by(mortstat) %>%
  summarize(across(c(contains("mean"), contains("peak")), ~ mean(.x))) %>%
  pivot_longer(cols = -mortstat) %>%
  pivot_wider(names_from = mortstat,
              values_from = value,
              names_prefix = 'died_') %>%
  rename(variable = name)

# get the best variable based on concodance
best_var = data_summary %>%
  arrange(desc(concordance_mean)) %>%
  slice(1) %>%
  pull(variable)

best_var_vec = sens_female_single %>%
  filter(variable == best_var) %>%
  pull(concordance)

t_tests = sens_female_single%>%
  group_by(variable) %>%
  filter(variable != best_var) %>%
  nest() %>%
  mutate(t_test = map(
    data,
    ~ t.test(
      .x$concordance,
      best_var_vec,
      var.eq = FALSE,
      paired = FALSE,
      alternative = "less"
    )
  ),
  res = map(t_test, tidy)) %>%
  unnest(res) %>%
  ungroup() %>%
  select(variable, p.value)

# join t tests with summary data and make table
data_summary %>%
  left_join(t_tests) %>%
  left_join(df_means) %>%
  arrange(desc(concordance_mean)) %>%
  mutate(
    variable_fac = forcats::fct_reorder(variable_fac, concordance_mean),
    lb = concordance_mean - 1.96 * concordance_se,
    ub = concordance_mean + 1.96 * concordance_se,
    ci = paste0(
      sprintf("%0.3f", round(concordance_mean, 3)),
      " (",
      sprintf("%0.3f", round(lb, 3)),
      ", ",
      sprintf("%0.3f", round(ub, 3)),
      ")"
    )
  ) %>%
  select(variable_fac,
         ci,
         p.value,
         mean_alive = died_0,
         mean_deceased = died_1) %>%
  mutate(pvalue = style_pvalue(p.value, digits = 3)) %>%
  mutate(pvalue = ifelse(is.na(pvalue), "---", pvalue),
         across(starts_with("mean"), ~ if_else(is.na(.x), "---", as.character(round(
           .x, 0
         ))))) %>%
  rename(
    Variable = variable_fac,
    "Mean (95% CI)" = ci,
    "p-value" = pvalue,
    "Mean value among alive" = mean_alive,
    "Mean value among deceased" = mean_deceased
  ) %>%
  gt::gt() %>%
  gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>%
  gt::tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_body(columns = everything(), rows = p.value > 0.05 |
                             is.na(p.value))
  ) %>%
  gt::cols_hide(p.value) %>%
  tab_header(title = "100x 10-fold Cross-Validated Single Variable Concordance", subtitle = "Female") %>%
  tab_footnote(footnote = "p-value for one sided t-test comparing variable to variable with higest mean concordance",
               locations = cells_column_labels(columns = "p-value")) %>%
  cols_align(columns = everything(), align = "left")
100x 10-fold Cross-Validated Single Variable Concordance
Female
Variable Mean (95% CI) Mean value among alive Mean value among deceased p-value1
mean_scrf_steps 0.780 (0.776, 0.784) 10038 5828 ---
peak30_scrf_steps 0.765 (0.760, 0.770) 80 61 <0.001
peak30_acti_steps 0.759 (0.754, 0.764) 48 38 <0.001
peak1_vs_revised_steps_15 0.753 (0.749, 0.758) 89 70 <0.001
peak1_scrf_steps 0.753 (0.749, 0.757) 103 89 <0.001
peak1_PAXMTSM 0.752 (0.748, 0.756) 54 46 <0.001
peak30_vs_revised_steps_15 0.752 (0.747, 0.757) 59 42 <0.001
peak1_acti_steps 0.752 (0.747, 0.756) 72 57 <0.001
peak1_log10PAXMTSM 0.751 (0.747, 0.755) 2 2 <0.001
mean_adept_steps 0.751 (0.746, 0.755) 2125 1186 <0.001
mean_scssl_steps 0.749 (0.745, 0.754) 8126 5160 <0.001
peak1_AC 0.749 (0.745, 0.753) 12414 10497 <0.001
peak30_oak_steps_80 0.749 (0.744, 0.754) 67 51 <0.001
peak1_log10AC 0.749 (0.744, 0.753) 4 4 <0.001
peak30_scssl_steps 0.748 (0.743, 0.753) 77 57 <0.001
mean_oak_steps_80 0.748 (0.744, 0.752) 10623 6999 <0.001
peak1_scssl_steps 0.747 (0.743, 0.752) 102 85 <0.001
peak30_PAXMTSM 0.745 (0.740, 0.749) 40 35 <0.001
peak30_log10PAXMTSM 0.744 (0.739, 0.748) 2 2 <0.001
peak1_oak_steps_80 0.744 (0.739, 0.748) 96 79 <0.001
peak30_vs_steps_15 0.743 (0.738, 0.748) 54 40 <0.001
mean_vs_revised_steps_15 0.743 (0.738, 0.747) 7717 4757 <0.001
peak30_AC 0.742 (0.737, 0.746) 8973 7576 <0.001
peak30_log10AC 0.741 (0.736, 0.745) 4 4 <0.001
peak30_adept_steps 0.739 (0.734, 0.743) 31 20 <0.001
mean_vs_steps_15 0.736 (0.732, 0.740) 8613 5788 <0.001
peak1_vs_steps_15 0.736 (0.731, 0.741) 83 66 <0.001
mean_acti_steps 0.727 (0.724, 0.731) 11593 8758 <0.001
peak1_adept_steps 0.720 (0.715, 0.725) 64 44 <0.001
bin_mobilityproblem 0.699 (0.695, 0.703) --- --- <0.001
age_in_years_at_screening 0.695 (0.690, 0.701) --- --- <0.001
mean_AC 0.691 (0.687, 0.696) 2575014 2041764 <0.001
mean_PAXMTSM 0.687 (0.683, 0.692) 13474 11056 <0.001
mean_log10PAXMTSM 0.649 (0.643, 0.654) 998 910 <0.001
general_health_condition 0.647 (0.642, 0.653) --- --- <0.001
mean_log10AC 0.626 (0.621, 0.632) 2949 2761 <0.001
bin_diabetes 0.614 (0.610, 0.618) --- --- <0.001
cat_education 0.597 (0.593, 0.602) --- --- <0.001
cat_smoke 0.585 (0.580, 0.590) --- --- <0.001
chd 0.560 (0.557, 0.563) --- --- <0.001
chf 0.559 (0.556, 0.562) --- --- <0.001
heartattack 0.558 (0.555, 0.561) --- --- <0.001
stroke 0.548 (0.545, 0.551) --- --- <0.001
cat_alcohol 0.542 (0.537, 0.547) --- --- <0.001
cancer 0.534 (0.530, 0.538) --- --- <0.001
race_hispanic_origin 0.524 (0.521, 0.527) --- --- <0.001
cat_bmi 0.496 (0.491, 0.501) --- --- <0.001
1 p-value for one sided t-test comparing variable to variable with higest mean concordance